Mercurial > ~darius > hgwebdir.cgi > scrape-vb
changeset 8:d17fd6f3a492
- Catch up with new VB page layout.
- Don't explode if we can't send an SMS, just log it.
author | darius |
---|---|
date | Thu, 18 Oct 2007 06:57:35 +0000 (2007-10-18) |
parents | bf896507faa9 |
children | 3e03facad74b |
files | scrape-vb.py |
diffstat | 1 files changed, 24 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/scrape-vb.py Fri Sep 07 01:31:47 2007 +0000 +++ b/scrape-vb.py Thu Oct 18 06:57:35 2007 +0000 @@ -6,7 +6,7 @@ # Prints out (and emails) when criteria match based on cost, # destination, etc # -# $Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $ +# $Id: scrape-vb.py,v 1.7 2007/10/18 06:57:35 darius Exp $ ############################################################################ # # Copyright (C) 2007 Daniel O'Connor. All rights reserved. @@ -40,7 +40,7 @@ usage = '''%prog [options] Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini''' -optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $") +optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.7 2007/10/18 06:57:35 darius Exp $") optparse.add_option('-d', '--debug', action="store_true", default=False, help="Disable mail & SMS sending, prints message to stdout") optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead") @@ -69,7 +69,6 @@ ''' sys.exit(0) -parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) conf = ConfigParser.ConfigParser() @@ -107,16 +106,21 @@ smshndl = SMSVodaAu.SMSVodaAu(smsuser, smspass) s = BeautifulSoup.BeautifulSoup(f) -hrr = s.find("ul", "happyhr-rows") -if (hrr == None): +citypairs = s.findAll("td", "city-pair") +if (citypairs == []): print "No happy hour details found" sys.exit(0) - -hrlist = hrr.findAll("li") + +prices = s.findAll("td", "dash-r price") +if (prices == []): + print "Couldn't find prices" + sys.exit(0) -# XXX: I wanted to use findAll('ul', 'happyhr-conditions') but it -# doesn't work -times = parsetper.match(s.findAll('ul')[11].find('li').string) +if (len(citypairs) != len(prices)): + print "City pair & price tables don't have equal size" + sys.exit(0) + +times = parsetper.search(s.find('p', 'tandc').string) if (times == None): print "Unable to parse travel period " + parsetper.match(s.findAll('ul')[11].find('li')) sys.exit(0) @@ -131,16 +135,12 @@ # list of each matching flight (city1, city2, cost, url) # output = {} -for i in hrlist: +for i, p in zip(citypairs, prices): href = i.find('a') - match = parsetitle.match(href['title']) - if (match == None): - print "Unable to match " + str(s) - continue - city1 = match.group(1) - city2 = match.group(2) - cost = int(match.group(3)) + city1 = href.next.strip() + city2 = href.next.next.next.next.next.strip() + cost = int(p.find('a').string.strip('$^ ')) url = href['href'] for email in conf.sections(): @@ -222,12 +222,16 @@ if (smssend): for o in output: if (conf.has_option(o, 'phone')): + pnum = conf.get(o, 'phone') msg = "" for i in output[o]: msg = msg + "%s <-> %s $%d, " % (i[0], i[1], i[2]) # Chop off the last , & make sure the whole message is not # too large. msgend = min(len(msg) - 2, 160) - print "SMS to " + conf.get(o, 'phone') print msg[0:msgend] - smshndl.sendamsg(conf.get(o, 'phone'), msg[0:msgend]) + try: + smshndl.sendamsg(pnum, msg[0:msgend]) + print "Sent SMS to " + pnum + except: + print "Unable to send SMS to " + pnum