Mercurial > ~darius > hgwebdir.cgi > scrape-gm
annotate scrape-gm.py @ 16:eeee17d2072c default tip
Modify to search for a given server (IP:port).
Player names are not returned in this case.
author | Daniel O'Connor <darius@dons.net.au> |
---|---|
date | Fri, 16 Oct 2009 23:41:42 +1030 |
parents | 789cf10ce4c9 |
children |
rev | line source |
---|---|
1 | 1 #!/usr/bin/env python |
2 | |
3 ############################################################################ | |
4 # Screen scraper for game-monitor.com | |
5 # | |
6 # Prints out matched player names agreated by server | |
7 # | |
8 ############################################################################ | |
9 # | |
15 | 10 # Copyright (C) 2008 Daniel O'Connor. All rights reserved. |
1 | 11 # |
12 # Redistribution and use in source and binary forms, with or without | |
13 # modification, are permitted provided that the following conditions | |
14 # are met: | |
15 # 1. Redistributions of source code must retain the above copyright | |
16 # notice, this list of conditions and the following disclaimer. | |
17 # 2. Redistributions in binary form must reproduce the above copyright | |
18 # notice, this list of conditions and the following disclaimer in the | |
19 # documentation and/or other materials provided with the distribution. | |
20 # | |
21 # THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
22 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 # ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE | |
25 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 # SUCH DAMAGE. | |
32 # | |
33 ############################################################################ | |
34 | |
35 import re, time, datetime, urllib, sys, BeautifulSoup | |
36 | |
15 | 37 debug = False |
38 | |
1 | 39 class Server: |
40 alltags = re.compile('<[^>]*>') | |
41 vwhttags = re.compile('<(br|hr)>') | |
42 hwhttags = re.compile('\ ') | |
15 | 43 typetag = re.compile('<td><a href="/GameSearch/([^/]+)/.*</td>') |
44 | |
1 | 45 def __init__(self, description = "", ip = "", port = 0, mapname = "", |
46 updateage = 0, numplayers = 0, maxplayers = 0, players = []): | |
47 self.description = description | |
48 self.ip = ip | |
49 self.port = port | |
50 self.mapname = mapname | |
51 self.updateage = int(updateage) | |
52 self.players = [] | |
53 self.numplayers = numplayers | |
54 self.maxplayers = maxplayers | |
55 | |
56 def __init__(self, pcols, scols): | |
16
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
57 # For a server search.. |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
58 # pcols[1] = Server description |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
59 # For a player search.. |
15 | 60 # pcols[1] = Player name |
61 # pcols[2] = Server description | |
1 | 62 # scols[0] = Players in server / max players |
15 | 63 # scols[1] = Server IP & port |
64 # scols[2] = Map name | |
65 # scols[3] = Game type | |
66 # scols[8] = Update age | |
67 if debug: | |
68 print "pcols = " + str(pcols) | |
69 print "scols = " + str(scols) | |
70 | |
71 self.pcountre = re.compile("([0-9]+)/([0-9]+)") | |
72 self.ipportre = re.compile("([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+):([0-9]+)") | |
73 self.sdesc = re.compile(" +[0-9]+\. +(.*)") | |
74 | |
16
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
75 if len(pcols) == 3: |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
76 m = self.sdesc.match(pcols[2]) |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
77 else: |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
78 m = self.sdesc.match(pcols[1]) |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
79 |
15 | 80 if (m == None): |
81 raise SyntaxError | |
82 self.description = m.group(1) | |
83 | |
84 m = self.ipportre.match(scols[1]) | |
85 if (m == None): | |
86 raise SyntaxError | |
87 | |
88 self.ip = m.group(1) | |
89 self.port = int(m.group(2)) | |
90 self.gametype = scols[3] | |
91 self.mapname = scols[2] | |
92 self.updateage = scols[8] | |
93 m = self.pcountre.match(scols[0]) | |
1 | 94 if (m == None): |
95 raise SyntaxError | |
96 | |
97 self.numplayers = int(m.group(1)) | |
98 self.maxplayers = int(m.group(2)) | |
99 self.players = [] | |
100 | |
101 def __str__(self): | |
102 plist = "" | |
16
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
103 if len(self.players) > 0: |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
104 plist = ": " |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
105 for p in self.players: |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
106 plist = plist + " " + str(p) |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
107 plist = plist + " " |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
108 |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
109 return "%s: %s (%s:%d) | Map: %s | Players: %d/%d %s(%s old)" % \ |
15 | 110 (self.gametype, self.description, self.ip, self.port, self.mapname, |
111 self.numplayers, self.maxplayers, plist, | |
112 self.updateage) | |
1 | 113 |
114 def GetTuple(scols): | |
115 return str(scols[2]) + ":" + str(scols[3]) | |
116 GetTuple = staticmethod(GetTuple) | |
117 | |
118 def FixTags(s): | |
15 | 119 # Mangle game type |
120 t = Server.typetag.match(s) | |
121 if t != None: | |
122 s = t.group(1) | |
1 | 123 s = re.sub(Server.vwhttags, '\n', s) |
124 s = re.sub(Server.hwhttags, '', s) | |
125 s = str(BeautifulSoup.BeautifulStoneSoup( \ | |
126 s, convertEntities = BeautifulSoup.BeautifulStoneSoup.XML_ENTITIES)) | |
127 s = re.sub(Server.alltags, '', s) | |
128 return(s) | |
129 FixTags = staticmethod(FixTags) | |
130 | |
131 def Scrape(handle): | |
132 s = BeautifulSoup.BeautifulSoup(handle) | |
133 | |
15 | 134 playertbl = s.find("table", "results") |
1 | 135 if (playertbl == None): |
15 | 136 if True: |
137 print "Unable to find results" | |
1 | 138 return None |
139 | |
15 | 140 servertbl = playertbl.findNext("table") |
1 | 141 |
142 playerrows = playertbl.findAll("tr") | |
143 serverrows = servertbl.findAll("tr") | |
144 | |
145 if (len(playerrows) != len(serverrows)): | |
146 print "Internal error 41223" | |
147 return | |
148 | |
149 servers = {} | |
150 for i in range(len(playerrows[1:])): | |
151 pcols = playerrows[i].findAll('td') | |
152 scols = serverrows[i].findAll('td') | |
16
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
153 if len(pcols) != 3 and len(pcols) != 2: |
15 | 154 if debug: |
16
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
155 print "pcols has length %d, expected 2 or 3" % len(pcols) |
1 | 156 continue |
157 | |
158 pcols = map(lambda c : Server.FixTags(str(c)), pcols) | |
159 scols = map(lambda c : Server.FixTags(str(c)), scols) | |
160 | |
161 stuple = Server.GetTuple(scols) | |
162 | |
163 if (stuple not in servers): | |
164 s = Server(pcols, scols) | |
165 servers[stuple] = s | |
166 | |
16
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
167 # Only add players if it's a player query |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
168 if len(pcols) == 3: |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
169 servers[stuple].addplayer(pcols[1]) |
1 | 170 |
171 return servers | |
172 Scrape = staticmethod(Scrape) | |
173 | |
174 def addplayer(self, pname): | |
175 self.players.append(pname) | |
176 | |
16
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
177 |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
178 isipadrre = re.compile('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+') |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
179 |
15 | 180 if True: |
1 | 181 maxhits = 10 |
182 if (len(sys.argv) < 2): | |
183 print "Bad usage" | |
184 print sys.argv[0] + "search_string" | |
185 sys.exit(1) | |
186 | |
187 try: | |
188 #f = open("gm.html") | |
16
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
189 if isipadrre.match(sys.argv[1]) == None: |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
190 f = urllib.urlopen("http://www.game-monitor.com/search.php?location=AU&search=" + urllib.quote(sys.argv[1]) + "&type=player&location=AU") |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
191 else: |
eeee17d2072c
Modify to search for a given server (IP:port).
Daniel O'Connor <darius@dons.net.au>
parents:
15
diff
changeset
|
192 f = urllib.urlopen("http://www.game-monitor.com/search.php?location=AU&search=" + urllib.quote(sys.argv[1])) |
1 | 193 except IOError, e: |
194 print "Unable to fetch page - " + str(e) | |
195 sys.exit(0) | |
196 | |
197 servers = Server.Scrape(f) | |
198 del f | |
6 | 199 if (servers == None): |
200 print "No results available, please check manually" | |
201 elif (len(servers) == 0): | |
1 | 202 print "No players found" |
203 else: | |
15 | 204 tmp = [] |
205 for i in servers: | |
206 tmp.append(servers[i]) | |
207 tmp.sort() | |
1 | 208 i = 0 |
15 | 209 for s in tmp: |
1 | 210 i = i + 1 |
15 | 211 print s |
1 | 212 if (i >= maxhits): |
213 print "*** Stopping after " + str(maxhits) + " hits" | |
214 break |