Mercurial > ~darius > hgwebdir.cgi > scrape-gm
changeset 1:ac32969d1bec SCRAPEGM_1_0
Initial revision
author | darius |
---|---|
date | Sat, 25 Aug 2007 05:15:14 +0000 |
parents | b0cffb14076b |
children | 29842b54c795 |
files | scrape-gm.py |
diffstat | 1 files changed, 165 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scrape-gm.py Sat Aug 25 05:15:14 2007 +0000 @@ -0,0 +1,165 @@ +#!/usr/bin/env python + +############################################################################ +# Screen scraper for game-monitor.com +# +# Prints out matched player names agreated by server +# +# $Id: scrape-gm.py,v 1.1.1.1 2007/08/25 05:15:14 darius Exp $ +############################################################################ +# +# Copyright (C) 2007 Daniel O'Connor. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +############################################################################ + +import re, time, datetime, urllib, sys, BeautifulSoup + +class Server: + alltags = re.compile('<[^>]*>') + vwhttags = re.compile('<(br|hr)>') + hwhttags = re.compile('\ ') + + def __init__(self, description = "", ip = "", port = 0, mapname = "", + updateage = 0, numplayers = 0, maxplayers = 0, players = []): + self.description = description + self.ip = ip + self.port = port + self.mapname = mapname + self.updateage = int(updateage) + self.players = [] + self.numplayers = numplayers + self.maxplayers = maxplayers + + def __init__(self, pcols, scols): + # pcols[2] = Player name + # pcols[3] = Server description + # scols[0] = Players in server / max players + # scols[2] = Server IP + # scols[3] = Server port + # scols[4] = Map name + # scols[10] = Update age + self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?") + self.description = pcols[3] + self.ip = scols[2] + self.port = int(scols[3]) + self.mapname = scols[4] + self.updateage = scols[10] + m = self.tuplere.match(scols[0]) + if (m == None): + raise SyntaxError + + self.numplayers = int(m.group(1)) + self.maxplayers = int(m.group(2)) + self.players = [] + + def __str__(self): + plist = "" + for p in self.players: + plist = plist + " " + str(p) + + return "%s | Map: %s | Players: %d/%d : %s (%s old)" % \ + (self.description, self.mapname, self.numplayers, self.maxplayers, \ + plist, self.updateage) + + def GetTuple(scols): + return str(scols[2]) + ":" + str(scols[3]) + GetTuple = staticmethod(GetTuple) + + def FixTags(s): + s = re.sub(Server.vwhttags, '\n', s) + s = re.sub(Server.hwhttags, '', s) + s = str(BeautifulSoup.BeautifulStoneSoup( \ + s, convertEntities = BeautifulSoup.BeautifulStoneSoup.XML_ENTITIES)) + s = re.sub(Server.alltags, '', s) + return(s) + FixTags = staticmethod(FixTags) + + def Scrape(handle): + s = BeautifulSoup.BeautifulSoup(handle) + + playertbl = s.find("table", "search_table") + if (playertbl == None): + print "Unable to find results" + return None + + servertbl = playertbl.findNext("table", "search_table") + + playerrows = playertbl.findAll("tr") + serverrows = servertbl.findAll("tr") + + if (len(playerrows) != len(serverrows)): + print "Internal error 41223" + return + + servers = {} + for i in range(len(playerrows[1:])): + pcols = playerrows[i].findAll('td') + scols = serverrows[i].findAll('td') + if (len(pcols) != 4): + continue + + pcols = map(lambda c : Server.FixTags(str(c)), pcols) + scols = map(lambda c : Server.FixTags(str(c)), scols) + + stuple = Server.GetTuple(scols) + + if (stuple not in servers): + s = Server(pcols, scols) + servers[stuple] = s + + servers[stuple].addplayer(pcols[2]) + + return servers + Scrape = staticmethod(Scrape) + + def addplayer(self, pname): + self.players.append(pname) + + +if (1): + maxhits = 10 + if (len(sys.argv) < 2): + print "Bad usage" + print sys.argv[0] + "search_string" + sys.exit(1) + + try: + #f = open("gm.html") + f = urllib.urlopen("http://www.game-monitor.com/search.php?search=" + urllib.quote(sys.argv[1]) + "&type=player") + except IOError, e: + print "Unable to fetch page - " + str(e) + sys.exit(0) + + servers = Server.Scrape(f) + del f + if (len(servers) == 0): + print "No players found" + else: + i = 0 + for s in servers: + i = i + 1 + print servers[s] + if (i >= maxhits): + print "*** Stopping after " + str(maxhits) + " hits" + break