comparison scrape-gm.py @ 13:e8550290e512

Update parser for new format.
author darius@Inchoate
date Sun, 14 Dec 2008 18:48:15 +1030
parents ae9e833e4447
children
comparison
equal deleted inserted replaced
12:ae9e833e4447 13:e8550290e512
32 # 32 #
33 ############################################################################ 33 ############################################################################
34 34
35 import re, time, datetime, urllib, sys, BeautifulSoup 35 import re, time, datetime, urllib, sys, BeautifulSoup
36 36
37 debug = False
38
37 class Server: 39 class Server:
38 alltags = re.compile('<[^>]*>') 40 alltags = re.compile('<[^>]*>')
39 vwhttags = re.compile('<(br|hr)>') 41 vwhttags = re.compile('<(br|hr)>')
40 hwhttags = re.compile('\&nbsp;') 42 hwhttags = re.compile('\&nbsp;')
41 typetag = re.compile('<td><a href="/GameSearch/([^/]+)/.*</td>') 43 typetag = re.compile('<td><a href="/GameSearch/([^/]+)/.*</td>')
50 self.players = [] 52 self.players = []
51 self.numplayers = numplayers 53 self.numplayers = numplayers
52 self.maxplayers = maxplayers 54 self.maxplayers = maxplayers
53 55
54 def __init__(self, pcols, scols): 56 def __init__(self, pcols, scols):
55 # pcols[2] = Player name 57 # pcols[1] = Player name
56 # pcols[3] = Server description 58 # pcols[2] = Server description
57 # scols[0] = Players in server / max players 59 # scols[0] = Players in server / max players
58 # scols[2] = Server IP 60 # scols[1] = Server IP & port
59 # scols[3] = Server port 61 # scols[2] = Map name
60 # scols[4] = Map name 62 # scols[3] = Game type
61 # scols[5] = Game type 63 # scols[8] = Update age
62 # scols[10] = Update age 64 if debug:
63 self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?") 65 print "pcols = " + str(pcols)
64 self.description = pcols[3] 66 print "scols = " + str(scols)
65 self.ip = scols[2] 67
66 self.port = int(scols[3]) 68 self.pcountre = re.compile("([0-9]+)/([0-9]+)")
67 self.mapname = scols[4] 69 self.ipportre = re.compile("([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+):([0-9]+)")
68 self.gametype = scols[5] 70 self.sdesc = re.compile(" +[0-9]+\. +(.*)")
69 self.updateage = scols[10] 71
70 m = self.tuplere.match(scols[0]) 72 m = self.sdesc.match(pcols[2])
73 if (m == None):
74 raise SyntaxError
75 self.description = m.group(1)
76
77 m = self.ipportre.match(scols[1])
78 if (m == None):
79 raise SyntaxError
80
81 self.ip = m.group(1)
82 self.port = int(m.group(2))
83 self.gametype = scols[3]
84 self.mapname = scols[2]
85 self.updateage = scols[8]
86 m = self.pcountre.match(scols[0])
71 if (m == None): 87 if (m == None):
72 raise SyntaxError 88 raise SyntaxError
73 89
74 self.numplayers = int(m.group(1)) 90 self.numplayers = int(m.group(1))
75 self.maxplayers = int(m.group(2)) 91 self.maxplayers = int(m.group(2))
105 def Scrape(handle): 121 def Scrape(handle):
106 s = BeautifulSoup.BeautifulSoup(handle) 122 s = BeautifulSoup.BeautifulSoup(handle)
107 123
108 playertbl = s.find("table", "results") 124 playertbl = s.find("table", "results")
109 if (playertbl == None): 125 if (playertbl == None):
110 #print "Unable to find results" 126 if True:
127 print "Unable to find results"
111 return None 128 return None
112 129
113 servertbl = playertbl.findNext("table") 130 servertbl = playertbl.findNext("table")
114 131
115 playerrows = playertbl.findAll("tr") 132 playerrows = playertbl.findAll("tr")
121 138
122 servers = {} 139 servers = {}
123 for i in range(len(playerrows[1:])): 140 for i in range(len(playerrows[1:])):
124 pcols = playerrows[i].findAll('td') 141 pcols = playerrows[i].findAll('td')
125 scols = serverrows[i].findAll('td') 142 scols = serverrows[i].findAll('td')
126 if (len(pcols) != 4): 143 if (len(pcols) != 3):
144 if debug:
145 print "pcols has length %d, expected 3" % len(pcols)
127 continue 146 continue
128 147
129 pcols = map(lambda c : Server.FixTags(str(c)), pcols) 148 pcols = map(lambda c : Server.FixTags(str(c)), pcols)
130 scols = map(lambda c : Server.FixTags(str(c)), scols) 149 scols = map(lambda c : Server.FixTags(str(c)), scols)
131 150
133 152
134 if (stuple not in servers): 153 if (stuple not in servers):
135 s = Server(pcols, scols) 154 s = Server(pcols, scols)
136 servers[stuple] = s 155 servers[stuple] = s
137 156
138 servers[stuple].addplayer(pcols[2]) 157 servers[stuple].addplayer(pcols[1])
139 158
140 return servers 159 return servers
141 Scrape = staticmethod(Scrape) 160 Scrape = staticmethod(Scrape)
142 161
143 def addplayer(self, pname): 162 def addplayer(self, pname):
144 self.players.append(pname) 163 self.players.append(pname)
145 164
146 165
147 if (1): 166 if True:
148 maxhits = 10 167 maxhits = 10
149 if (len(sys.argv) < 2): 168 if (len(sys.argv) < 2):
150 print "Bad usage" 169 print "Bad usage"
151 print sys.argv[0] + "search_string" 170 print sys.argv[0] + "search_string"
152 sys.exit(1) 171 sys.exit(1)