Mercurial > ~darius > hgwebdir.cgi > scrape-gm
diff scrape-gm.py @ 15:789cf10ce4c9
Update for new format (for sure)
author | darius@Inchoate |
---|---|
date | Sun, 14 Dec 2008 18:55:39 +1030 |
parents | 5058c2695109 |
children | eeee17d2072c |
line wrap: on
line diff
--- a/scrape-gm.py Sun Dec 14 18:51:14 2008 +1030 +++ b/scrape-gm.py Sun Dec 14 18:55:39 2008 +1030 @@ -5,10 +5,9 @@ # # Prints out matched player names agreated by server # -# $Id: scrape-gm.py,v 1.3 2007/11/18 08:54:07 darius Exp $ ############################################################################ # -# Copyright (C) 2007 Daniel O'Connor. All rights reserved. +# Copyright (C) 2008 Daniel O'Connor. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -35,11 +34,14 @@ import re, time, datetime, urllib, sys, BeautifulSoup +debug = False + class Server: alltags = re.compile('<[^>]*>') vwhttags = re.compile('<(br|hr)>') hwhttags = re.compile('\ ') - + typetag = re.compile('<td><a href="/GameSearch/([^/]+)/.*</td>') + def __init__(self, description = "", ip = "", port = 0, mapname = "", updateage = 0, numplayers = 0, maxplayers = 0, players = []): self.description = description @@ -52,20 +54,36 @@ self.maxplayers = maxplayers def __init__(self, pcols, scols): - # pcols[2] = Player name - # pcols[3] = Server description + # pcols[1] = Player name + # pcols[2] = Server description # scols[0] = Players in server / max players - # scols[2] = Server IP - # scols[3] = Server port - # scols[4] = Map name - # scols[10] = Update age - self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?") - self.description = pcols[3] - self.ip = scols[2] - self.port = int(scols[3]) - self.mapname = scols[4] - self.updateage = scols[10] - m = self.tuplere.match(scols[0]) + # scols[1] = Server IP & port + # scols[2] = Map name + # scols[3] = Game type + # scols[8] = Update age + if debug: + print "pcols = " + str(pcols) + print "scols = " + str(scols) + + self.pcountre = re.compile("([0-9]+)/([0-9]+)") + self.ipportre = re.compile("([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+):([0-9]+)") + self.sdesc = re.compile(" +[0-9]+\. +(.*)") + + m = self.sdesc.match(pcols[2]) + if (m == None): + raise SyntaxError + self.description = m.group(1) + + m = self.ipportre.match(scols[1]) + if (m == None): + raise SyntaxError + + self.ip = m.group(1) + self.port = int(m.group(2)) + self.gametype = scols[3] + self.mapname = scols[2] + self.updateage = scols[8] + m = self.pcountre.match(scols[0]) if (m == None): raise SyntaxError @@ -78,15 +96,20 @@ for p in self.players: plist = plist + " " + str(p) - return "%s | Map: %s | Players: %d/%d : %s (%s old)" % \ - (self.description, self.mapname, self.numplayers, self.maxplayers, \ - plist, self.updateage) + return "%s: %s (%s:%d) | Map: %s | Players: %d/%d : %s (%s old)" % \ + (self.gametype, self.description, self.ip, self.port, self.mapname, + self.numplayers, self.maxplayers, plist, + self.updateage) def GetTuple(scols): return str(scols[2]) + ":" + str(scols[3]) GetTuple = staticmethod(GetTuple) def FixTags(s): + # Mangle game type + t = Server.typetag.match(s) + if t != None: + s = t.group(1) s = re.sub(Server.vwhttags, '\n', s) s = re.sub(Server.hwhttags, '', s) s = str(BeautifulSoup.BeautifulStoneSoup( \ @@ -98,12 +121,13 @@ def Scrape(handle): s = BeautifulSoup.BeautifulSoup(handle) - playertbl = s.find("table", "search_table") + playertbl = s.find("table", "results") if (playertbl == None): - #print "Unable to find results" + if True: + print "Unable to find results" return None - servertbl = playertbl.findNext("table", "search_table") + servertbl = playertbl.findNext("table") playerrows = playertbl.findAll("tr") serverrows = servertbl.findAll("tr") @@ -116,7 +140,9 @@ for i in range(len(playerrows[1:])): pcols = playerrows[i].findAll('td') scols = serverrows[i].findAll('td') - if (len(pcols) != 4): + if (len(pcols) != 3): + if debug: + print "pcols has length %d, expected 3" % len(pcols) continue pcols = map(lambda c : Server.FixTags(str(c)), pcols) @@ -128,7 +154,7 @@ s = Server(pcols, scols) servers[stuple] = s - servers[stuple].addplayer(pcols[2]) + servers[stuple].addplayer(pcols[1]) return servers Scrape = staticmethod(Scrape) @@ -137,7 +163,7 @@ self.players.append(pname) -if (1): +if True: maxhits = 10 if (len(sys.argv) < 2): print "Bad usage" @@ -146,7 +172,7 @@ try: #f = open("gm.html") - f = urllib.urlopen("http://www.game-monitor.com/search.php?search=" + urllib.quote(sys.argv[1]) + "&type=player&location=AU") + f = urllib.urlopen("http://www.game-monitor.com/search.php?location=AU&search=" + urllib.quote(sys.argv[1]) + "&type=player&location=AU") except IOError, e: print "Unable to fetch page - " + str(e) sys.exit(0) @@ -158,10 +184,14 @@ elif (len(servers) == 0): print "No players found" else: + tmp = [] + for i in servers: + tmp.append(servers[i]) + tmp.sort() i = 0 - for s in servers: + for s in tmp: i = i + 1 - print servers[s] + print s if (i >= maxhits): print "*** Stopping after " + str(maxhits) + " hits" break