Mercurial > ~darius > hgwebdir.cgi > scrape-gm
view scrape-gm.py @ 4:1378b9c50305
Add IRSSI wrapper script
author | darius |
---|---|
date | Sat, 25 Aug 2007 05:19:37 +0000 |
parents | ac32969d1bec |
children | 294581b9c72f |
line wrap: on
line source
#!/usr/bin/env python ############################################################################ # Screen scraper for game-monitor.com # # Prints out matched player names agreated by server # # $Id: scrape-gm.py,v 1.1.1.1 2007/08/25 05:15:14 darius Exp $ ############################################################################ # # Copyright (C) 2007 Daniel O'Connor. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # ############################################################################ import re, time, datetime, urllib, sys, BeautifulSoup class Server: alltags = re.compile('<[^>]*>') vwhttags = re.compile('<(br|hr)>') hwhttags = re.compile('\ ') def __init__(self, description = "", ip = "", port = 0, mapname = "", updateage = 0, numplayers = 0, maxplayers = 0, players = []): self.description = description self.ip = ip self.port = port self.mapname = mapname self.updateage = int(updateage) self.players = [] self.numplayers = numplayers self.maxplayers = maxplayers def __init__(self, pcols, scols): # pcols[2] = Player name # pcols[3] = Server description # scols[0] = Players in server / max players # scols[2] = Server IP # scols[3] = Server port # scols[4] = Map name # scols[10] = Update age self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?") self.description = pcols[3] self.ip = scols[2] self.port = int(scols[3]) self.mapname = scols[4] self.updateage = scols[10] m = self.tuplere.match(scols[0]) if (m == None): raise SyntaxError self.numplayers = int(m.group(1)) self.maxplayers = int(m.group(2)) self.players = [] def __str__(self): plist = "" for p in self.players: plist = plist + " " + str(p) return "%s | Map: %s | Players: %d/%d : %s (%s old)" % \ (self.description, self.mapname, self.numplayers, self.maxplayers, \ plist, self.updateage) def GetTuple(scols): return str(scols[2]) + ":" + str(scols[3]) GetTuple = staticmethod(GetTuple) def FixTags(s): s = re.sub(Server.vwhttags, '\n', s) s = re.sub(Server.hwhttags, '', s) s = str(BeautifulSoup.BeautifulStoneSoup( \ s, convertEntities = BeautifulSoup.BeautifulStoneSoup.XML_ENTITIES)) s = re.sub(Server.alltags, '', s) return(s) FixTags = staticmethod(FixTags) def Scrape(handle): s = BeautifulSoup.BeautifulSoup(handle) playertbl = s.find("table", "search_table") if (playertbl == None): print "Unable to find results" return None servertbl = playertbl.findNext("table", "search_table") playerrows = playertbl.findAll("tr") serverrows = servertbl.findAll("tr") if (len(playerrows) != len(serverrows)): print "Internal error 41223" return servers = {} for i in range(len(playerrows[1:])): pcols = playerrows[i].findAll('td') scols = serverrows[i].findAll('td') if (len(pcols) != 4): continue pcols = map(lambda c : Server.FixTags(str(c)), pcols) scols = map(lambda c : Server.FixTags(str(c)), scols) stuple = Server.GetTuple(scols) if (stuple not in servers): s = Server(pcols, scols) servers[stuple] = s servers[stuple].addplayer(pcols[2]) return servers Scrape = staticmethod(Scrape) def addplayer(self, pname): self.players.append(pname) if (1): maxhits = 10 if (len(sys.argv) < 2): print "Bad usage" print sys.argv[0] + "search_string" sys.exit(1) try: #f = open("gm.html") f = urllib.urlopen("http://www.game-monitor.com/search.php?search=" + urllib.quote(sys.argv[1]) + "&type=player") except IOError, e: print "Unable to fetch page - " + str(e) sys.exit(0) servers = Server.Scrape(f) del f if (len(servers) == 0): print "No players found" else: i = 0 for s in servers: i = i + 1 print servers[s] if (i >= maxhits): print "*** Stopping after " + str(maxhits) + " hits" break