diff scrape-vb.py @ 8:d17fd6f3a492

- Catch up with new VB page layout. - Don't explode if we can't send an SMS, just log it.
author darius
date Thu, 18 Oct 2007 06:57:35 +0000
parents bf896507faa9
children
line wrap: on
line diff
--- a/scrape-vb.py	Fri Sep 07 01:31:47 2007 +0000
+++ b/scrape-vb.py	Thu Oct 18 06:57:35 2007 +0000
@@ -6,7 +6,7 @@
 # Prints out (and emails) when criteria match based on cost,
 # destination, etc
 #
-# $Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $
+# $Id: scrape-vb.py,v 1.7 2007/10/18 06:57:35 darius Exp $
 ############################################################################
 #
 # Copyright (C) 2007 Daniel O'Connor. All rights reserved.
@@ -40,7 +40,7 @@
 usage = '''%prog [options]
 Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini'''
 
-optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $")
+optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.7 2007/10/18 06:57:35 darius Exp $")
 optparse.add_option('-d', '--debug', action="store_true", default=False,
                     help="Disable mail & SMS sending, prints message to stdout")
 optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead")
@@ -69,7 +69,6 @@
 '''
     sys.exit(0)
     
-parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE)
 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE)
 
 conf = ConfigParser.ConfigParser()
@@ -107,16 +106,21 @@
     smshndl = SMSVodaAu.SMSVodaAu(smsuser, smspass)
 
 s = BeautifulSoup.BeautifulSoup(f)
-hrr = s.find("ul", "happyhr-rows")
-if (hrr == None):
+citypairs = s.findAll("td", "city-pair")
+if (citypairs == []):
     print "No happy hour details found"
     sys.exit(0)
-    
-hrlist = hrr.findAll("li")
+
+prices = s.findAll("td", "dash-r price")
+if (prices == []):
+    print "Couldn't find prices"
+    sys.exit(0)
 
-# XXX: I wanted to use findAll('ul', 'happyhr-conditions') but it
-# doesn't work
-times = parsetper.match(s.findAll('ul')[11].find('li').string)
+if (len(citypairs) != len(prices)):
+    print "City pair & price tables don't have equal size"
+    sys.exit(0)
+
+times = parsetper.search(s.find('p', 'tandc').string)
 if (times == None):
     print "Unable to parse travel period " + parsetper.match(s.findAll('ul')[11].find('li'))
     sys.exit(0)
@@ -131,16 +135,12 @@
 # list of each matching flight (city1, city2, cost, url)
 #
 output = {}
-for i in hrlist:
+for i, p in zip(citypairs, prices):
     href =  i.find('a')
-    match = parsetitle.match(href['title'])
-    if (match == None):
-        print "Unable to match " + str(s)
-        continue
 
-    city1 = match.group(1)
-    city2 = match.group(2)
-    cost = int(match.group(3))
+    city1 = href.next.strip()
+    city2 = href.next.next.next.next.next.strip()
+    cost = int(p.find('a').string.strip('$^ '))
     url = href['href']
     
     for email in conf.sections():
@@ -222,12 +222,16 @@
 if (smssend):
     for o in output:
         if (conf.has_option(o, 'phone')):
+            pnum = conf.get(o, 'phone')
             msg = ""
             for i in output[o]:
                 msg = msg + "%s <-> %s $%d, " % (i[0], i[1], i[2])
             # Chop off the last , & make sure the whole message is not
             # too large.
             msgend = min(len(msg) - 2, 160)
-            print "SMS to " + conf.get(o, 'phone')
             print msg[0:msgend]
-            smshndl.sendamsg(conf.get(o, 'phone'), msg[0:msgend])
+            try:
+                smshndl.sendamsg(pnum, msg[0:msgend])
+                print "Sent SMS to " + pnum
+            except:
+                print "Unable to send SMS to " + pnum