1
|
1 #!/usr/bin/env python
|
|
2
|
|
3 ############################################################################
|
|
4 # Screen scraper for game-monitor.com
|
|
5 #
|
|
6 # Prints out matched player names agreated by server
|
|
7 #
|
|
8 # $Id: scrape-gm.py,v 1.1.1.1 2007/08/25 05:15:14 darius Exp $
|
|
9 ############################################################################
|
|
10 #
|
|
11 # Copyright (C) 2007 Daniel O'Connor. All rights reserved.
|
|
12 #
|
|
13 # Redistribution and use in source and binary forms, with or without
|
|
14 # modification, are permitted provided that the following conditions
|
|
15 # are met:
|
|
16 # 1. Redistributions of source code must retain the above copyright
|
|
17 # notice, this list of conditions and the following disclaimer.
|
|
18 # 2. Redistributions in binary form must reproduce the above copyright
|
|
19 # notice, this list of conditions and the following disclaimer in the
|
|
20 # documentation and/or other materials provided with the distribution.
|
|
21 #
|
|
22 # THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
23 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
24 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
25 # ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
26 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
27 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
28 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
29 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
30 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
31 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
32 # SUCH DAMAGE.
|
|
33 #
|
|
34 ############################################################################
|
|
35
|
|
36 import re, time, datetime, urllib, sys, BeautifulSoup
|
|
37
|
|
38 class Server:
|
|
39 alltags = re.compile('<[^>]*>')
|
|
40 vwhttags = re.compile('<(br|hr)>')
|
|
41 hwhttags = re.compile('\ ')
|
|
42
|
|
43 def __init__(self, description = "", ip = "", port = 0, mapname = "",
|
|
44 updateage = 0, numplayers = 0, maxplayers = 0, players = []):
|
|
45 self.description = description
|
|
46 self.ip = ip
|
|
47 self.port = port
|
|
48 self.mapname = mapname
|
|
49 self.updateage = int(updateage)
|
|
50 self.players = []
|
|
51 self.numplayers = numplayers
|
|
52 self.maxplayers = maxplayers
|
|
53
|
|
54 def __init__(self, pcols, scols):
|
|
55 # pcols[2] = Player name
|
|
56 # pcols[3] = Server description
|
|
57 # scols[0] = Players in server / max players
|
|
58 # scols[2] = Server IP
|
|
59 # scols[3] = Server port
|
|
60 # scols[4] = Map name
|
|
61 # scols[10] = Update age
|
|
62 self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?")
|
|
63 self.description = pcols[3]
|
|
64 self.ip = scols[2]
|
|
65 self.port = int(scols[3])
|
|
66 self.mapname = scols[4]
|
|
67 self.updateage = scols[10]
|
|
68 m = self.tuplere.match(scols[0])
|
|
69 if (m == None):
|
|
70 raise SyntaxError
|
|
71
|
|
72 self.numplayers = int(m.group(1))
|
|
73 self.maxplayers = int(m.group(2))
|
|
74 self.players = []
|
|
75
|
|
76 def __str__(self):
|
|
77 plist = ""
|
|
78 for p in self.players:
|
|
79 plist = plist + " " + str(p)
|
|
80
|
|
81 return "%s | Map: %s | Players: %d/%d : %s (%s old)" % \
|
|
82 (self.description, self.mapname, self.numplayers, self.maxplayers, \
|
|
83 plist, self.updateage)
|
|
84
|
|
85 def GetTuple(scols):
|
|
86 return str(scols[2]) + ":" + str(scols[3])
|
|
87 GetTuple = staticmethod(GetTuple)
|
|
88
|
|
89 def FixTags(s):
|
|
90 s = re.sub(Server.vwhttags, '\n', s)
|
|
91 s = re.sub(Server.hwhttags, '', s)
|
|
92 s = str(BeautifulSoup.BeautifulStoneSoup( \
|
|
93 s, convertEntities = BeautifulSoup.BeautifulStoneSoup.XML_ENTITIES))
|
|
94 s = re.sub(Server.alltags, '', s)
|
|
95 return(s)
|
|
96 FixTags = staticmethod(FixTags)
|
|
97
|
|
98 def Scrape(handle):
|
|
99 s = BeautifulSoup.BeautifulSoup(handle)
|
|
100
|
|
101 playertbl = s.find("table", "search_table")
|
|
102 if (playertbl == None):
|
|
103 print "Unable to find results"
|
|
104 return None
|
|
105
|
|
106 servertbl = playertbl.findNext("table", "search_table")
|
|
107
|
|
108 playerrows = playertbl.findAll("tr")
|
|
109 serverrows = servertbl.findAll("tr")
|
|
110
|
|
111 if (len(playerrows) != len(serverrows)):
|
|
112 print "Internal error 41223"
|
|
113 return
|
|
114
|
|
115 servers = {}
|
|
116 for i in range(len(playerrows[1:])):
|
|
117 pcols = playerrows[i].findAll('td')
|
|
118 scols = serverrows[i].findAll('td')
|
|
119 if (len(pcols) != 4):
|
|
120 continue
|
|
121
|
|
122 pcols = map(lambda c : Server.FixTags(str(c)), pcols)
|
|
123 scols = map(lambda c : Server.FixTags(str(c)), scols)
|
|
124
|
|
125 stuple = Server.GetTuple(scols)
|
|
126
|
|
127 if (stuple not in servers):
|
|
128 s = Server(pcols, scols)
|
|
129 servers[stuple] = s
|
|
130
|
|
131 servers[stuple].addplayer(pcols[2])
|
|
132
|
|
133 return servers
|
|
134 Scrape = staticmethod(Scrape)
|
|
135
|
|
136 def addplayer(self, pname):
|
|
137 self.players.append(pname)
|
|
138
|
|
139
|
|
140 if (1):
|
|
141 maxhits = 10
|
|
142 if (len(sys.argv) < 2):
|
|
143 print "Bad usage"
|
|
144 print sys.argv[0] + "search_string"
|
|
145 sys.exit(1)
|
|
146
|
|
147 try:
|
|
148 #f = open("gm.html")
|
|
149 f = urllib.urlopen("http://www.game-monitor.com/search.php?search=" + urllib.quote(sys.argv[1]) + "&type=player")
|
|
150 except IOError, e:
|
|
151 print "Unable to fetch page - " + str(e)
|
|
152 sys.exit(0)
|
|
153
|
|
154 servers = Server.Scrape(f)
|
|
155 del f
|
|
156 if (len(servers) == 0):
|
|
157 print "No players found"
|
|
158 else:
|
|
159 i = 0
|
|
160 for s in servers:
|
|
161 i = i + 1
|
|
162 print servers[s]
|
|
163 if (i >= maxhits):
|
|
164 print "*** Stopping after " + str(maxhits) + " hits"
|
|
165 break
|