view scrape-gm.py @ 11:22a51e8c0a69

Update copyright date. Remove CVS ID.
author darius@inchoate.localdomain
date Fri, 29 Feb 2008 21:05:14 +1030
parents 0e18c714b69d
children ae9e833e4447
line wrap: on
line source

#!/usr/bin/env python

############################################################################
# Screen scraper for game-monitor.com
#
# Prints out matched player names agreated by server
#
############################################################################
#
# Copyright (C) 2008 Daniel O'Connor. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
############################################################################

import re, time, datetime, urllib, sys, BeautifulSoup

class Server:
    alltags = re.compile('<[^>]*>')
    vwhttags = re.compile('<(br|hr)>')
    hwhttags = re.compile('\&nbsp;')

    def __init__(self, description = "", ip = "", port = 0, mapname = "",
                 updateage = 0, numplayers = 0, maxplayers = 0, players = []):
        self.description = description
        self.ip = ip
        self.port = port
        self.mapname = mapname
        self.updateage = int(updateage)
        self.players = []
        self.numplayers = numplayers
        self.maxplayers = maxplayers

    def __init__(self, pcols, scols):
        # pcols[2] = Player name
        # pcols[3] = Server description
        # scols[0] = Players in server / max players
        # scols[2] = Server IP
        # scols[3] = Server port 
        # scols[4] = Map name
        # scols[10] = Update age
        self.tuplere = re.compile("\[?([0-9]+)/([0-9]+)\]?")
        self.description = pcols[3]
        self.ip = scols[2]
        self.port = int(scols[3])
        self.mapname = scols[4]
        self.updateage = scols[10]
        m = self.tuplere.match(scols[0])
        if (m == None):
            raise SyntaxError
        
        self.numplayers = int(m.group(1))
        self.maxplayers = int(m.group(2))
        self.players = []

    def __str__(self):
        plist = ""
        for p in self.players:
            plist = plist + " " + str(p)
        
        return "%s (%s:%d) | Map: %s | Players: %d/%d : %s (%s old)" % \
               (self.description, self.ip, self.port, self.mapname,
                self.numplayers, self.maxplayers, plist,
                self.updateage)
    
    def GetTuple(scols):
        return str(scols[2]) + ":" + str(scols[3])
    GetTuple = staticmethod(GetTuple)

    def FixTags(s):
        s = re.sub(Server.vwhttags, '\n', s)
        s = re.sub(Server.hwhttags, '', s)
        s = str(BeautifulSoup.BeautifulStoneSoup( \
                s, convertEntities = BeautifulSoup.BeautifulStoneSoup.XML_ENTITIES))
        s = re.sub(Server.alltags, '', s)
        return(s)
    FixTags = staticmethod(FixTags)
    
    def Scrape(handle):
        s = BeautifulSoup.BeautifulSoup(handle)

        playertbl = s.find("table", "results")
        if (playertbl == None):
            #print "Unable to find results"
            return None
        
        servertbl = playertbl.findNext("table", "results")
    
        playerrows = playertbl.findAll("tr")
        serverrows = servertbl.findAll("tr")

        if (len(playerrows) != len(serverrows)):
            print "Internal error 41223"
            return

        servers = {}
        for i in range(len(playerrows[1:])):
            pcols = playerrows[i].findAll('td')
            scols = serverrows[i].findAll('td')
            if (len(pcols) != 4):
                continue
        
            pcols = map(lambda c : Server.FixTags(str(c)), pcols)
            scols = map(lambda c : Server.FixTags(str(c)), scols)

            stuple = Server.GetTuple(scols)

            if (stuple not in servers):
                s = Server(pcols, scols)
                servers[stuple] = s
            
            servers[stuple].addplayer(pcols[2])

        return servers
    Scrape = staticmethod(Scrape)
    
    def addplayer(self, pname):
        self.players.append(pname)
    
    
if (1):
    maxhits = 10
    if (len(sys.argv) < 2):
        print "Bad usage"
        print sys.argv[0] + "search_string"
        sys.exit(1)
    
    try:
        #f = open("gm.html")
        f = urllib.urlopen("http://www.game-monitor.com/search.php?location=AU&search=" + urllib.quote(sys.argv[1]) + "&type=player&location=AU")
    except IOError, e:
        print "Unable to fetch page - " + str(e)
        sys.exit(0)
    
    servers = Server.Scrape(f)
    del f
    if (servers == None):
        print "No results available, please check manually"
    elif (len(servers) == 0):
        print "No players found"
    else:
        i = 0
        for s in servers:
            i = i + 1
            print servers[s]
            if (i >= maxhits):
                print "*** Stopping after " + str(maxhits) + " hits"
                break