comparison scrape-gm.py @ 1:ac32969d1bec SCRAPEGM_1_0

Initial revision
author darius
date Sat, 25 Aug 2007 05:15:14 +0000
parents
children 294581b9c72f
comparison
equal deleted inserted replaced
0:b0cffb14076b 1:ac32969d1bec
1 #!/usr/bin/env python
2
3 ############################################################################
4 # Screen scraper for game-monitor.com
5 #
6 # Prints out matched player names agreated by server
7 #
8 # $Id: scrape-gm.py,v 1.1.1.1 2007/08/25 05:15:14 darius Exp $
9 ############################################################################
10 #
11 # Copyright (C) 2007 Daniel O'Connor. All rights reserved.
12 #
13 # Redistribution and use in source and binary forms, with or without
14 # modification, are permitted provided that the following conditions
15 # are met:
16 # 1. Redistributions of source code must retain the above copyright
17 # notice, this list of conditions and the following disclaimer.
18 # 2. Redistributions in binary form must reproduce the above copyright
19 # notice, this list of conditions and the following disclaimer in the
20 # documentation and/or other materials provided with the distribution.
21 #
22 # THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 # ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
26 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 # SUCH DAMAGE.
33 #
34 ############################################################################
35
36 import re, time, datetime, urllib, sys, BeautifulSoup
37
38 class Server:
39 alltags = re.compile('<[^>]*>')
40 vwhttags = re.compile('<(br|hr)>')
41 hwhttags = re.compile('\&nbsp;')
42
43 def __init__(self, description = "", ip = "", port = 0, mapname = "",
44 updateage = 0, numplayers = 0, maxplayers = 0, players = []):
45 self.description = description
46 self.ip = ip
47 self.port = port
48 self.mapname = mapname
49 self.updateage = int(updateage)
50 self.players = []
51 self.numplayers = numplayers
52 self.maxplayers = maxplayers
53
54 def __init__(self, pcols, scols):
55 # pcols[2] = Player name
56 # pcols[3] = Server description
57 # scols[0] = Players in server / max players
58 # scols[2] = Server IP
59 # scols[3] = Server port
60 # scols[4] = Map name
61 # scols[10] = Update age
62 self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?")
63 self.description = pcols[3]
64 self.ip = scols[2]
65 self.port = int(scols[3])
66 self.mapname = scols[4]
67 self.updateage = scols[10]
68 m = self.tuplere.match(scols[0])
69 if (m == None):
70 raise SyntaxError
71
72 self.numplayers = int(m.group(1))
73 self.maxplayers = int(m.group(2))
74 self.players = []
75
76 def __str__(self):
77 plist = ""
78 for p in self.players:
79 plist = plist + " " + str(p)
80
81 return "%s | Map: %s | Players: %d/%d : %s (%s old)" % \
82 (self.description, self.mapname, self.numplayers, self.maxplayers, \
83 plist, self.updateage)
84
85 def GetTuple(scols):
86 return str(scols[2]) + ":" + str(scols[3])
87 GetTuple = staticmethod(GetTuple)
88
89 def FixTags(s):
90 s = re.sub(Server.vwhttags, '\n', s)
91 s = re.sub(Server.hwhttags, '', s)
92 s = str(BeautifulSoup.BeautifulStoneSoup( \
93 s, convertEntities = BeautifulSoup.BeautifulStoneSoup.XML_ENTITIES))
94 s = re.sub(Server.alltags, '', s)
95 return(s)
96 FixTags = staticmethod(FixTags)
97
98 def Scrape(handle):
99 s = BeautifulSoup.BeautifulSoup(handle)
100
101 playertbl = s.find("table", "search_table")
102 if (playertbl == None):
103 print "Unable to find results"
104 return None
105
106 servertbl = playertbl.findNext("table", "search_table")
107
108 playerrows = playertbl.findAll("tr")
109 serverrows = servertbl.findAll("tr")
110
111 if (len(playerrows) != len(serverrows)):
112 print "Internal error 41223"
113 return
114
115 servers = {}
116 for i in range(len(playerrows[1:])):
117 pcols = playerrows[i].findAll('td')
118 scols = serverrows[i].findAll('td')
119 if (len(pcols) != 4):
120 continue
121
122 pcols = map(lambda c : Server.FixTags(str(c)), pcols)
123 scols = map(lambda c : Server.FixTags(str(c)), scols)
124
125 stuple = Server.GetTuple(scols)
126
127 if (stuple not in servers):
128 s = Server(pcols, scols)
129 servers[stuple] = s
130
131 servers[stuple].addplayer(pcols[2])
132
133 return servers
134 Scrape = staticmethod(Scrape)
135
136 def addplayer(self, pname):
137 self.players.append(pname)
138
139
140 if (1):
141 maxhits = 10
142 if (len(sys.argv) < 2):
143 print "Bad usage"
144 print sys.argv[0] + "search_string"
145 sys.exit(1)
146
147 try:
148 #f = open("gm.html")
149 f = urllib.urlopen("http://www.game-monitor.com/search.php?search=" + urllib.quote(sys.argv[1]) + "&type=player")
150 except IOError, e:
151 print "Unable to fetch page - " + str(e)
152 sys.exit(0)
153
154 servers = Server.Scrape(f)
155 del f
156 if (len(servers) == 0):
157 print "No players found"
158 else:
159 i = 0
160 for s in servers:
161 i = i + 1
162 print servers[s]
163 if (i >= maxhits):
164 print "*** Stopping after " + str(maxhits) + " hits"
165 break