Mercurial > ~darius > hgwebdir.cgi > scrape-gm
comparison scrape-gm.py @ 13:e8550290e512
Update parser for new format.
author | darius@Inchoate |
---|---|
date | Sun, 14 Dec 2008 18:48:15 +1030 |
parents | ae9e833e4447 |
children |
comparison
equal
deleted
inserted
replaced
12:ae9e833e4447 | 13:e8550290e512 |
---|---|
32 # | 32 # |
33 ############################################################################ | 33 ############################################################################ |
34 | 34 |
35 import re, time, datetime, urllib, sys, BeautifulSoup | 35 import re, time, datetime, urllib, sys, BeautifulSoup |
36 | 36 |
37 debug = False | |
38 | |
37 class Server: | 39 class Server: |
38 alltags = re.compile('<[^>]*>') | 40 alltags = re.compile('<[^>]*>') |
39 vwhttags = re.compile('<(br|hr)>') | 41 vwhttags = re.compile('<(br|hr)>') |
40 hwhttags = re.compile('\ ') | 42 hwhttags = re.compile('\ ') |
41 typetag = re.compile('<td><a href="/GameSearch/([^/]+)/.*</td>') | 43 typetag = re.compile('<td><a href="/GameSearch/([^/]+)/.*</td>') |
50 self.players = [] | 52 self.players = [] |
51 self.numplayers = numplayers | 53 self.numplayers = numplayers |
52 self.maxplayers = maxplayers | 54 self.maxplayers = maxplayers |
53 | 55 |
54 def __init__(self, pcols, scols): | 56 def __init__(self, pcols, scols): |
55 # pcols[2] = Player name | 57 # pcols[1] = Player name |
56 # pcols[3] = Server description | 58 # pcols[2] = Server description |
57 # scols[0] = Players in server / max players | 59 # scols[0] = Players in server / max players |
58 # scols[2] = Server IP | 60 # scols[1] = Server IP & port |
59 # scols[3] = Server port | 61 # scols[2] = Map name |
60 # scols[4] = Map name | 62 # scols[3] = Game type |
61 # scols[5] = Game type | 63 # scols[8] = Update age |
62 # scols[10] = Update age | 64 if debug: |
63 self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?") | 65 print "pcols = " + str(pcols) |
64 self.description = pcols[3] | 66 print "scols = " + str(scols) |
65 self.ip = scols[2] | 67 |
66 self.port = int(scols[3]) | 68 self.pcountre = re.compile("([0-9]+)/([0-9]+)") |
67 self.mapname = scols[4] | 69 self.ipportre = re.compile("([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+):([0-9]+)") |
68 self.gametype = scols[5] | 70 self.sdesc = re.compile(" +[0-9]+\. +(.*)") |
69 self.updateage = scols[10] | 71 |
70 m = self.tuplere.match(scols[0]) | 72 m = self.sdesc.match(pcols[2]) |
73 if (m == None): | |
74 raise SyntaxError | |
75 self.description = m.group(1) | |
76 | |
77 m = self.ipportre.match(scols[1]) | |
78 if (m == None): | |
79 raise SyntaxError | |
80 | |
81 self.ip = m.group(1) | |
82 self.port = int(m.group(2)) | |
83 self.gametype = scols[3] | |
84 self.mapname = scols[2] | |
85 self.updateage = scols[8] | |
86 m = self.pcountre.match(scols[0]) | |
71 if (m == None): | 87 if (m == None): |
72 raise SyntaxError | 88 raise SyntaxError |
73 | 89 |
74 self.numplayers = int(m.group(1)) | 90 self.numplayers = int(m.group(1)) |
75 self.maxplayers = int(m.group(2)) | 91 self.maxplayers = int(m.group(2)) |
105 def Scrape(handle): | 121 def Scrape(handle): |
106 s = BeautifulSoup.BeautifulSoup(handle) | 122 s = BeautifulSoup.BeautifulSoup(handle) |
107 | 123 |
108 playertbl = s.find("table", "results") | 124 playertbl = s.find("table", "results") |
109 if (playertbl == None): | 125 if (playertbl == None): |
110 #print "Unable to find results" | 126 if True: |
127 print "Unable to find results" | |
111 return None | 128 return None |
112 | 129 |
113 servertbl = playertbl.findNext("table") | 130 servertbl = playertbl.findNext("table") |
114 | 131 |
115 playerrows = playertbl.findAll("tr") | 132 playerrows = playertbl.findAll("tr") |
121 | 138 |
122 servers = {} | 139 servers = {} |
123 for i in range(len(playerrows[1:])): | 140 for i in range(len(playerrows[1:])): |
124 pcols = playerrows[i].findAll('td') | 141 pcols = playerrows[i].findAll('td') |
125 scols = serverrows[i].findAll('td') | 142 scols = serverrows[i].findAll('td') |
126 if (len(pcols) != 4): | 143 if (len(pcols) != 3): |
144 if debug: | |
145 print "pcols has length %d, expected 3" % len(pcols) | |
127 continue | 146 continue |
128 | 147 |
129 pcols = map(lambda c : Server.FixTags(str(c)), pcols) | 148 pcols = map(lambda c : Server.FixTags(str(c)), pcols) |
130 scols = map(lambda c : Server.FixTags(str(c)), scols) | 149 scols = map(lambda c : Server.FixTags(str(c)), scols) |
131 | 150 |
133 | 152 |
134 if (stuple not in servers): | 153 if (stuple not in servers): |
135 s = Server(pcols, scols) | 154 s = Server(pcols, scols) |
136 servers[stuple] = s | 155 servers[stuple] = s |
137 | 156 |
138 servers[stuple].addplayer(pcols[2]) | 157 servers[stuple].addplayer(pcols[1]) |
139 | 158 |
140 return servers | 159 return servers |
141 Scrape = staticmethod(Scrape) | 160 Scrape = staticmethod(Scrape) |
142 | 161 |
143 def addplayer(self, pname): | 162 def addplayer(self, pname): |
144 self.players.append(pname) | 163 self.players.append(pname) |
145 | 164 |
146 | 165 |
147 if (1): | 166 if True: |
148 maxhits = 10 | 167 maxhits = 10 |
149 if (len(sys.argv) < 2): | 168 if (len(sys.argv) < 2): |
150 print "Bad usage" | 169 print "Bad usage" |
151 print sys.argv[0] + "search_string" | 170 print sys.argv[0] + "search_string" |
152 sys.exit(1) | 171 sys.exit(1) |