comparison scrape-vb.py @ 8:d17fd6f3a492

- Catch up with new VB page layout. - Don't explode if we can't send an SMS, just log it.
author darius
date Thu, 18 Oct 2007 06:57:35 +0000
parents bf896507faa9
children
comparison
equal deleted inserted replaced
7:bf896507faa9 8:d17fd6f3a492
4 # Screen scraper for Virgin Blue to look for happy hour deals 4 # Screen scraper for Virgin Blue to look for happy hour deals
5 # 5 #
6 # Prints out (and emails) when criteria match based on cost, 6 # Prints out (and emails) when criteria match based on cost,
7 # destination, etc 7 # destination, etc
8 # 8 #
9 # $Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $ 9 # $Id: scrape-vb.py,v 1.7 2007/10/18 06:57:35 darius Exp $
10 ############################################################################ 10 ############################################################################
11 # 11 #
12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved. 12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved.
13 # 13 #
14 # Redistribution and use in source and binary forms, with or without 14 # Redistribution and use in source and binary forms, with or without
38 import ConfigParser, optparse, SMSVodaAu 38 import ConfigParser, optparse, SMSVodaAu
39 39
40 usage = '''%prog [options] 40 usage = '''%prog [options]
41 Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini''' 41 Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini'''
42 42
43 optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $") 43 optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.7 2007/10/18 06:57:35 darius Exp $")
44 optparse.add_option('-d', '--debug', action="store_true", default=False, 44 optparse.add_option('-d', '--debug', action="store_true", default=False,
45 help="Disable mail & SMS sending, prints message to stdout") 45 help="Disable mail & SMS sending, prints message to stdout")
46 optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead") 46 optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead")
47 optparse.add_option('-e', '--example', action="store_true", default=False, 47 optparse.add_option('-e', '--example', action="store_true", default=False,
48 help="Print an example configuration file to stdout and exit") 48 help="Print an example configuration file to stdout and exit")
67 maxcost=123 67 maxcost=123
68 phone=0498765432 68 phone=0498765432
69 ''' 69 '''
70 sys.exit(0) 70 sys.exit(0)
71 71
72 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE)
73 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) 72 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE)
74 73
75 conf = ConfigParser.ConfigParser() 74 conf = ConfigParser.ConfigParser()
76 conf.add_section('global') 75 conf.add_section('global')
77 conf.set('global', 'mailsubj', 'Virgin Blue Happy Hour Deals') 76 conf.set('global', 'mailsubj', 'Virgin Blue Happy Hour Deals')
105 104
106 if (smssend): 105 if (smssend):
107 smshndl = SMSVodaAu.SMSVodaAu(smsuser, smspass) 106 smshndl = SMSVodaAu.SMSVodaAu(smsuser, smspass)
108 107
109 s = BeautifulSoup.BeautifulSoup(f) 108 s = BeautifulSoup.BeautifulSoup(f)
110 hrr = s.find("ul", "happyhr-rows") 109 citypairs = s.findAll("td", "city-pair")
111 if (hrr == None): 110 if (citypairs == []):
112 print "No happy hour details found" 111 print "No happy hour details found"
113 sys.exit(0) 112 sys.exit(0)
114 113
115 hrlist = hrr.findAll("li") 114 prices = s.findAll("td", "dash-r price")
116 115 if (prices == []):
117 # XXX: I wanted to use findAll('ul', 'happyhr-conditions') but it 116 print "Couldn't find prices"
118 # doesn't work 117 sys.exit(0)
119 times = parsetper.match(s.findAll('ul')[11].find('li').string) 118
119 if (len(citypairs) != len(prices)):
120 print "City pair & price tables don't have equal size"
121 sys.exit(0)
122
123 times = parsetper.search(s.find('p', 'tandc').string)
120 if (times == None): 124 if (times == None):
121 print "Unable to parse travel period " + parsetper.match(s.findAll('ul')[11].find('li')) 125 print "Unable to parse travel period " + parsetper.match(s.findAll('ul')[11].find('li'))
122 sys.exit(0) 126 sys.exit(0)
123 127
124 frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3]) 128 frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3])
129 # 133 #
130 # Store in output, a dictionary keyed by email adddress which holds a 134 # Store in output, a dictionary keyed by email adddress which holds a
131 # list of each matching flight (city1, city2, cost, url) 135 # list of each matching flight (city1, city2, cost, url)
132 # 136 #
133 output = {} 137 output = {}
134 for i in hrlist: 138 for i, p in zip(citypairs, prices):
135 href = i.find('a') 139 href = i.find('a')
136 match = parsetitle.match(href['title']) 140
137 if (match == None): 141 city1 = href.next.strip()
138 print "Unable to match " + str(s) 142 city2 = href.next.next.next.next.next.strip()
139 continue 143 cost = int(p.find('a').string.strip('$^ '))
140
141 city1 = match.group(1)
142 city2 = match.group(2)
143 cost = int(match.group(3))
144 url = href['href'] 144 url = href['href']
145 145
146 for email in conf.sections(): 146 for email in conf.sections():
147 if (email == 'global'): 147 if (email == 'global'):
148 continue 148 continue
220 220
221 # SMS each person about their flights 221 # SMS each person about their flights
222 if (smssend): 222 if (smssend):
223 for o in output: 223 for o in output:
224 if (conf.has_option(o, 'phone')): 224 if (conf.has_option(o, 'phone')):
225 pnum = conf.get(o, 'phone')
225 msg = "" 226 msg = ""
226 for i in output[o]: 227 for i in output[o]:
227 msg = msg + "%s <-> %s $%d, " % (i[0], i[1], i[2]) 228 msg = msg + "%s <-> %s $%d, " % (i[0], i[1], i[2])
228 # Chop off the last , & make sure the whole message is not 229 # Chop off the last , & make sure the whole message is not
229 # too large. 230 # too large.
230 msgend = min(len(msg) - 2, 160) 231 msgend = min(len(msg) - 2, 160)
231 print "SMS to " + conf.get(o, 'phone')
232 print msg[0:msgend] 232 print msg[0:msgend]
233 smshndl.sendamsg(conf.get(o, 'phone'), msg[0:msgend]) 233 try:
234 smshndl.sendamsg(pnum, msg[0:msgend])
235 print "Sent SMS to " + pnum
236 except:
237 print "Unable to send SMS to " + pnum