comparison scrape-vb.py @ 7:bf896507faa9

Add code to send an SMS if configured to do so. Rearrange the output stage a bit to make it clearer.
author darius
date Fri, 07 Sep 2007 01:31:47 +0000
parents 9f3eb9a07966
children d17fd6f3a492
comparison
equal deleted inserted replaced
6:9f3eb9a07966 7:bf896507faa9
4 # Screen scraper for Virgin Blue to look for happy hour deals 4 # Screen scraper for Virgin Blue to look for happy hour deals
5 # 5 #
6 # Prints out (and emails) when criteria match based on cost, 6 # Prints out (and emails) when criteria match based on cost,
7 # destination, etc 7 # destination, etc
8 # 8 #
9 # $Id: scrape-vb.py,v 1.5 2007/08/29 07:37:59 darius Exp $ 9 # $Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $
10 ############################################################################ 10 ############################################################################
11 # 11 #
12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved. 12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved.
13 # 13 #
14 # Redistribution and use in source and binary forms, with or without 14 # Redistribution and use in source and binary forms, with or without
33 # SUCH DAMAGE. 33 # SUCH DAMAGE.
34 # 34 #
35 ############################################################################ 35 ############################################################################
36 36
37 import os, re, BeautifulSoup, datetime, time, smtplib, sys, urllib 37 import os, re, BeautifulSoup, datetime, time, smtplib, sys, urllib
38 import ConfigParser, optparse 38 import ConfigParser, optparse, SMSVodaAu
39 39
40 usage = '''%prog [options] 40 usage = '''%prog [options]
41 Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini''' 41 Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini'''
42 42
43 optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.5 2007/08/29 07:37:59 darius Exp $") 43 optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $")
44 optparse.add_option('-d', '--debug', action="store_true", default=False, 44 optparse.add_option('-d', '--debug', action="store_true", default=False,
45 help="Disable mail sending, prints mail message to stdout") 45 help="Disable mail & SMS sending, prints message to stdout")
46 optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead") 46 optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead")
47 optparse.add_option('-e', '--example', action="store_true", default=False, 47 optparse.add_option('-e', '--example', action="store_true", default=False,
48 help="Print an example configuration file to stdout and exit") 48 help="Print an example configuration file to stdout and exit")
49 (options, args) = optparse.parse_args() 49 (options, args) = optparse.parse_args()
50 50
53 mailsubj="Subject line for emails" 53 mailsubj="Subject line for emails"
54 # The following 3 options are necessary before email will be sent 54 # The following 3 options are necessary before email will be sent
55 mailfrom=user@host.com 55 mailfrom=user@host.com
56 mailsend=True 56 mailsend=True
57 mailhost=mail.server.com 57 mailhost=mail.server.com
58 smsuser=0412312312
59 smspass=mys3krit
60 smssend=True
58 61
59 [user@host.com] 62 [user@host.com]
60 # All fields are optional 63 # All fields are optional
61 city1=Foo 64 city1=Foo
62 city2=Bar 65 city2=Bar
63 when=dd/mm/yy 66 when=dd/mm/yy
64 maxcost=123 67 maxcost=123
68 phone=0498765432
65 ''' 69 '''
66 sys.exit(0) 70 sys.exit(0)
67 71
68 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) 72 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE)
69 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) 73 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE)
84 else: 88 else:
85 f = urllib.urlopen(conf.get('global', 'vburl')) 89 f = urllib.urlopen(conf.get('global', 'vburl'))
86 except IOError, e: 90 except IOError, e:
87 print "Unable to fetch page - " + str(e) 91 print "Unable to fetch page - " + str(e)
88 sys.exit(1) 92 sys.exit(1)
89 93
94 # Test if we have been configured to send SMSs
95 try:
96 smsuser = conf.get('global', 'smsuser')
97 smspass = conf.get('global', 'smspass')
98 smssend = conf.getboolean('global', 'smssend')
99 except ConfigParser.NoOptionError:
100 smssend = False
101
102 if (options.debug == True and smssend):
103 print "smssend overridden due to debugging"
104 smssend = False
105
106 if (smssend):
107 smshndl = SMSVodaAu.SMSVodaAu(smsuser, smspass)
108
90 s = BeautifulSoup.BeautifulSoup(f) 109 s = BeautifulSoup.BeautifulSoup(f)
91 hrr = s.find("ul", "happyhr-rows") 110 hrr = s.find("ul", "happyhr-rows")
92 if (hrr == None): 111 if (hrr == None):
93 print "No happy hour details found" 112 print "No happy hour details found"
94 sys.exit(0) 113 sys.exit(0)
103 sys.exit(0) 122 sys.exit(0)
104 123
105 frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3]) 124 frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3])
106 totime = datetime.datetime(*time.strptime(times.group(2), "%d/%m/%y")[0:3]) 125 totime = datetime.datetime(*time.strptime(times.group(2), "%d/%m/%y")[0:3])
107 126
127 #
128 # Go through the HTML and work out who wants to be notified of what
129 #
130 # Store in output, a dictionary keyed by email adddress which holds a
131 # list of each matching flight (city1, city2, cost, url)
132 #
108 output = {} 133 output = {}
109 for i in hrlist: 134 for i in hrlist:
110 href = i.find('a') 135 href = i.find('a')
111 match = parsetitle.match(href['title']) 136 match = parsetitle.match(href['title'])
112 if (match == None): 137 if (match == None):
150 if (t['email'] not in output): 175 if (t['email'] not in output):
151 176
152 output[t['email']] = [] 177 output[t['email']] = []
153 output[t['email']].append([city1, city2, cost, url]) 178 output[t['email']].append([city1, city2, cost, url])
154 179
180 # Test if we have been configured to send email
155 try: 181 try:
156 mailsubj = conf.get('global', 'mailsubj') 182 mailsubj = conf.get('global', 'mailsubj')
157 mailhost = conf.get('global', 'mailhost') 183 mailhost = conf.get('global', 'mailhost')
158 mailsend = conf.getboolean('global', 'mailsend') 184 mailsend = conf.getboolean('global', 'mailsend')
159 mailfrom = conf.get('global', 'mailfrom') 185 mailfrom = conf.get('global', 'mailfrom')
165 mailsend = False 191 mailsend = False
166 192
167 if (mailsend): 193 if (mailsend):
168 server = smtplib.SMTP(mailhost) 194 server = smtplib.SMTP(mailhost)
169 #server.set_debuglevel(1) 195 #server.set_debuglevel(1)
170 else: 196
171 print "Note: Mail sending disabled" 197 #
172 198 # Output the various notifications
173 for o in output: 199 #
174 if (mailsend): 200 ttimestr = "Note: travel period is from %s to %s" % \
175 msg = ("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj)) 201 (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y"))
202
203 # Email each person about their flights
204 if (mailsend):
205 for o in output:
206 msg = "From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj)
176 msg = msg + "Your criteria for flights have been matched\r\n\r\n" 207 msg = msg + "Your criteria for flights have been matched\r\n\r\n"
177 else: 208 for i in output[o]:
178 print "Match for " + o
179 for i in output[o]:
180 if (mailsend):
181 msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3]) 209 msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3])
182 else: 210
183 print "%s <-> %s costs $%d" % (i[0], i[1], i[2])
184
185 ttimestr = "Note: travel period is from %s to %s" % \
186 (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y"))
187
188 if (mailsend):
189 msg = msg + "\r\n" + ttimestr + "\r\n" 211 msg = msg + "\r\n" + ttimestr + "\r\n"
190 server.sendmail(mailfrom, o, msg) 212 server.sendmail(mailfrom, o, msg)
191 else: 213
192 print ttimestr 214 else:
193 print 215 # If not emailing print to stdout
216 for o in output:
217 print "Match for " + o
218 for i in output[o]:
219 print "%s <-> %s costs $%d" % (i[0], i[1], i[2])
220
221 # SMS each person about their flights
222 if (smssend):
223 for o in output:
224 if (conf.has_option(o, 'phone')):
225 msg = ""
226 for i in output[o]:
227 msg = msg + "%s <-> %s $%d, " % (i[0], i[1], i[2])
228 # Chop off the last , & make sure the whole message is not
229 # too large.
230 msgend = min(len(msg) - 2, 160)
231 print "SMS to " + conf.get(o, 'phone')
232 print msg[0:msgend]
233 smshndl.sendamsg(conf.get(o, 'phone'), msg[0:msgend])