diff scrape-gm.py @ 1:ac32969d1bec SCRAPEGM_1_0

Initial revision
author darius
date Sat, 25 Aug 2007 05:15:14 +0000
parents
children 294581b9c72f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scrape-gm.py	Sat Aug 25 05:15:14 2007 +0000
@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+
+############################################################################
+# Screen scraper for game-monitor.com
+#
+# Prints out matched player names agreated by server
+#
+# $Id: scrape-gm.py,v 1.1.1.1 2007/08/25 05:15:14 darius Exp $
+############################################################################
+#
+# Copyright (C) 2007 Daniel O'Connor. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+############################################################################
+
+import re, time, datetime, urllib, sys, BeautifulSoup
+
+class Server:
+    alltags = re.compile('<[^>]*>')
+    vwhttags = re.compile('<(br|hr)>')
+    hwhttags = re.compile('\&nbsp;')
+
+    def __init__(self, description = "", ip = "", port = 0, mapname = "",
+                 updateage = 0, numplayers = 0, maxplayers = 0, players = []):
+        self.description = description
+        self.ip = ip
+        self.port = port
+        self.mapname = mapname
+        self.updateage = int(updateage)
+        self.players = []
+        self.numplayers = numplayers
+        self.maxplayers = maxplayers
+
+    def __init__(self, pcols, scols):
+        # pcols[2] = Player name
+        # pcols[3] = Server description
+        # scols[0] = Players in server / max players
+        # scols[2] = Server IP
+        # scols[3] = Server port 
+        # scols[4] = Map name
+        # scols[10] = Update age
+        self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?")
+        self.description = pcols[3]
+        self.ip = scols[2]
+        self.port = int(scols[3])
+        self.mapname = scols[4]
+        self.updateage = scols[10]
+        m = self.tuplere.match(scols[0])
+        if (m == None):
+            raise SyntaxError
+        
+        self.numplayers = int(m.group(1))
+        self.maxplayers = int(m.group(2))
+        self.players = []
+
+    def __str__(self):
+        plist = ""
+        for p in self.players:
+            plist = plist + " " + str(p)
+        
+        return "%s | Map: %s | Players: %d/%d : %s (%s old)" % \
+               (self.description, self.mapname, self.numplayers, self.maxplayers, \
+                plist, self.updateage)
+    
+    def GetTuple(scols):
+        return str(scols[2]) + ":" + str(scols[3])
+    GetTuple = staticmethod(GetTuple)
+
+    def FixTags(s):
+        s = re.sub(Server.vwhttags, '\n', s)
+        s = re.sub(Server.hwhttags, '', s)
+        s = str(BeautifulSoup.BeautifulStoneSoup( \
+                s, convertEntities = BeautifulSoup.BeautifulStoneSoup.XML_ENTITIES))
+        s = re.sub(Server.alltags, '', s)
+        return(s)
+    FixTags = staticmethod(FixTags)
+    
+    def Scrape(handle):
+        s = BeautifulSoup.BeautifulSoup(handle)
+
+        playertbl = s.find("table", "search_table")
+        if (playertbl == None):
+            print "Unable to find results"
+            return None
+        
+        servertbl = playertbl.findNext("table", "search_table")
+    
+        playerrows = playertbl.findAll("tr")
+        serverrows = servertbl.findAll("tr")
+
+        if (len(playerrows) != len(serverrows)):
+            print "Internal error 41223"
+            return
+
+        servers = {}
+        for i in range(len(playerrows[1:])):
+            pcols = playerrows[i].findAll('td')
+            scols = serverrows[i].findAll('td')
+            if (len(pcols) != 4):
+                continue
+        
+            pcols = map(lambda c : Server.FixTags(str(c)), pcols)
+            scols = map(lambda c : Server.FixTags(str(c)), scols)
+
+            stuple = Server.GetTuple(scols)
+
+            if (stuple not in servers):
+                s = Server(pcols, scols)
+                servers[stuple] = s
+            
+            servers[stuple].addplayer(pcols[2])
+
+        return servers
+    Scrape = staticmethod(Scrape)
+    
+    def addplayer(self, pname):
+        self.players.append(pname)
+    
+    
+if (1):
+    maxhits = 10
+    if (len(sys.argv) < 2):
+        print "Bad usage"
+        print sys.argv[0] + "search_string"
+        sys.exit(1)
+    
+    try:
+        #f = open("gm.html")
+        f = urllib.urlopen("http://www.game-monitor.com/search.php?search=" + urllib.quote(sys.argv[1]) + "&type=player")
+    except IOError, e:
+        print "Unable to fetch page - " + str(e)
+        sys.exit(0)
+    
+    servers = Server.Scrape(f)
+    del f
+    if (len(servers) == 0):
+        print "No players found"
+    else:
+        i = 0
+        for s in servers:
+            i = i + 1
+            print servers[s]
+            if (i >= maxhits):
+                print "*** Stopping after " + str(maxhits) + " hits"
+                break