Mercurial > ~darius > hgwebdir.cgi > scrape-vb
comparison scrape-vb.py @ 7:bf896507faa9
Add code to send an SMS if configured to do so.
Rearrange the output stage a bit to make it clearer.
author | darius |
---|---|
date | Fri, 07 Sep 2007 01:31:47 +0000 |
parents | 9f3eb9a07966 |
children | d17fd6f3a492 |
comparison
equal
deleted
inserted
replaced
6:9f3eb9a07966 | 7:bf896507faa9 |
---|---|
4 # Screen scraper for Virgin Blue to look for happy hour deals | 4 # Screen scraper for Virgin Blue to look for happy hour deals |
5 # | 5 # |
6 # Prints out (and emails) when criteria match based on cost, | 6 # Prints out (and emails) when criteria match based on cost, |
7 # destination, etc | 7 # destination, etc |
8 # | 8 # |
9 # $Id: scrape-vb.py,v 1.5 2007/08/29 07:37:59 darius Exp $ | 9 # $Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $ |
10 ############################################################################ | 10 ############################################################################ |
11 # | 11 # |
12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved. | 12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved. |
13 # | 13 # |
14 # Redistribution and use in source and binary forms, with or without | 14 # Redistribution and use in source and binary forms, with or without |
33 # SUCH DAMAGE. | 33 # SUCH DAMAGE. |
34 # | 34 # |
35 ############################################################################ | 35 ############################################################################ |
36 | 36 |
37 import os, re, BeautifulSoup, datetime, time, smtplib, sys, urllib | 37 import os, re, BeautifulSoup, datetime, time, smtplib, sys, urllib |
38 import ConfigParser, optparse | 38 import ConfigParser, optparse, SMSVodaAu |
39 | 39 |
40 usage = '''%prog [options] | 40 usage = '''%prog [options] |
41 Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini''' | 41 Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini''' |
42 | 42 |
43 optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.5 2007/08/29 07:37:59 darius Exp $") | 43 optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.6 2007/09/07 01:31:47 darius Exp $") |
44 optparse.add_option('-d', '--debug', action="store_true", default=False, | 44 optparse.add_option('-d', '--debug', action="store_true", default=False, |
45 help="Disable mail sending, prints mail message to stdout") | 45 help="Disable mail & SMS sending, prints message to stdout") |
46 optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead") | 46 optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead") |
47 optparse.add_option('-e', '--example', action="store_true", default=False, | 47 optparse.add_option('-e', '--example', action="store_true", default=False, |
48 help="Print an example configuration file to stdout and exit") | 48 help="Print an example configuration file to stdout and exit") |
49 (options, args) = optparse.parse_args() | 49 (options, args) = optparse.parse_args() |
50 | 50 |
53 mailsubj="Subject line for emails" | 53 mailsubj="Subject line for emails" |
54 # The following 3 options are necessary before email will be sent | 54 # The following 3 options are necessary before email will be sent |
55 mailfrom=user@host.com | 55 mailfrom=user@host.com |
56 mailsend=True | 56 mailsend=True |
57 mailhost=mail.server.com | 57 mailhost=mail.server.com |
58 smsuser=0412312312 | |
59 smspass=mys3krit | |
60 smssend=True | |
58 | 61 |
59 [user@host.com] | 62 [user@host.com] |
60 # All fields are optional | 63 # All fields are optional |
61 city1=Foo | 64 city1=Foo |
62 city2=Bar | 65 city2=Bar |
63 when=dd/mm/yy | 66 when=dd/mm/yy |
64 maxcost=123 | 67 maxcost=123 |
68 phone=0498765432 | |
65 ''' | 69 ''' |
66 sys.exit(0) | 70 sys.exit(0) |
67 | 71 |
68 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) | 72 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) |
69 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) | 73 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) |
84 else: | 88 else: |
85 f = urllib.urlopen(conf.get('global', 'vburl')) | 89 f = urllib.urlopen(conf.get('global', 'vburl')) |
86 except IOError, e: | 90 except IOError, e: |
87 print "Unable to fetch page - " + str(e) | 91 print "Unable to fetch page - " + str(e) |
88 sys.exit(1) | 92 sys.exit(1) |
89 | 93 |
94 # Test if we have been configured to send SMSs | |
95 try: | |
96 smsuser = conf.get('global', 'smsuser') | |
97 smspass = conf.get('global', 'smspass') | |
98 smssend = conf.getboolean('global', 'smssend') | |
99 except ConfigParser.NoOptionError: | |
100 smssend = False | |
101 | |
102 if (options.debug == True and smssend): | |
103 print "smssend overridden due to debugging" | |
104 smssend = False | |
105 | |
106 if (smssend): | |
107 smshndl = SMSVodaAu.SMSVodaAu(smsuser, smspass) | |
108 | |
90 s = BeautifulSoup.BeautifulSoup(f) | 109 s = BeautifulSoup.BeautifulSoup(f) |
91 hrr = s.find("ul", "happyhr-rows") | 110 hrr = s.find("ul", "happyhr-rows") |
92 if (hrr == None): | 111 if (hrr == None): |
93 print "No happy hour details found" | 112 print "No happy hour details found" |
94 sys.exit(0) | 113 sys.exit(0) |
103 sys.exit(0) | 122 sys.exit(0) |
104 | 123 |
105 frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3]) | 124 frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3]) |
106 totime = datetime.datetime(*time.strptime(times.group(2), "%d/%m/%y")[0:3]) | 125 totime = datetime.datetime(*time.strptime(times.group(2), "%d/%m/%y")[0:3]) |
107 | 126 |
127 # | |
128 # Go through the HTML and work out who wants to be notified of what | |
129 # | |
130 # Store in output, a dictionary keyed by email adddress which holds a | |
131 # list of each matching flight (city1, city2, cost, url) | |
132 # | |
108 output = {} | 133 output = {} |
109 for i in hrlist: | 134 for i in hrlist: |
110 href = i.find('a') | 135 href = i.find('a') |
111 match = parsetitle.match(href['title']) | 136 match = parsetitle.match(href['title']) |
112 if (match == None): | 137 if (match == None): |
150 if (t['email'] not in output): | 175 if (t['email'] not in output): |
151 | 176 |
152 output[t['email']] = [] | 177 output[t['email']] = [] |
153 output[t['email']].append([city1, city2, cost, url]) | 178 output[t['email']].append([city1, city2, cost, url]) |
154 | 179 |
180 # Test if we have been configured to send email | |
155 try: | 181 try: |
156 mailsubj = conf.get('global', 'mailsubj') | 182 mailsubj = conf.get('global', 'mailsubj') |
157 mailhost = conf.get('global', 'mailhost') | 183 mailhost = conf.get('global', 'mailhost') |
158 mailsend = conf.getboolean('global', 'mailsend') | 184 mailsend = conf.getboolean('global', 'mailsend') |
159 mailfrom = conf.get('global', 'mailfrom') | 185 mailfrom = conf.get('global', 'mailfrom') |
165 mailsend = False | 191 mailsend = False |
166 | 192 |
167 if (mailsend): | 193 if (mailsend): |
168 server = smtplib.SMTP(mailhost) | 194 server = smtplib.SMTP(mailhost) |
169 #server.set_debuglevel(1) | 195 #server.set_debuglevel(1) |
170 else: | 196 |
171 print "Note: Mail sending disabled" | 197 # |
172 | 198 # Output the various notifications |
173 for o in output: | 199 # |
174 if (mailsend): | 200 ttimestr = "Note: travel period is from %s to %s" % \ |
175 msg = ("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj)) | 201 (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y")) |
202 | |
203 # Email each person about their flights | |
204 if (mailsend): | |
205 for o in output: | |
206 msg = "From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj) | |
176 msg = msg + "Your criteria for flights have been matched\r\n\r\n" | 207 msg = msg + "Your criteria for flights have been matched\r\n\r\n" |
177 else: | 208 for i in output[o]: |
178 print "Match for " + o | |
179 for i in output[o]: | |
180 if (mailsend): | |
181 msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3]) | 209 msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3]) |
182 else: | 210 |
183 print "%s <-> %s costs $%d" % (i[0], i[1], i[2]) | |
184 | |
185 ttimestr = "Note: travel period is from %s to %s" % \ | |
186 (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y")) | |
187 | |
188 if (mailsend): | |
189 msg = msg + "\r\n" + ttimestr + "\r\n" | 211 msg = msg + "\r\n" + ttimestr + "\r\n" |
190 server.sendmail(mailfrom, o, msg) | 212 server.sendmail(mailfrom, o, msg) |
191 else: | 213 |
192 print ttimestr | 214 else: |
193 print | 215 # If not emailing print to stdout |
216 for o in output: | |
217 print "Match for " + o | |
218 for i in output[o]: | |
219 print "%s <-> %s costs $%d" % (i[0], i[1], i[2]) | |
220 | |
221 # SMS each person about their flights | |
222 if (smssend): | |
223 for o in output: | |
224 if (conf.has_option(o, 'phone')): | |
225 msg = "" | |
226 for i in output[o]: | |
227 msg = msg + "%s <-> %s $%d, " % (i[0], i[1], i[2]) | |
228 # Chop off the last , & make sure the whole message is not | |
229 # too large. | |
230 msgend = min(len(msg) - 2, 160) | |
231 print "SMS to " + conf.get(o, 'phone') | |
232 print msg[0:msgend] | |
233 smshndl.sendamsg(conf.get(o, 'phone'), msg[0:msgend]) |