comparison scrape-vb.py @ 3:89232ea0c3d4

Read configuration from an ini file rather than hard coding it in the source.
author darius
date Mon, 27 Aug 2007 01:42:11 +0000
parents 8045db05180b
children e3f4ef0b6e39
comparison
equal deleted inserted replaced
2:a28ea6e01650 3:89232ea0c3d4
4 # Screen scraper for Virgin Blue to look for happy hour deals 4 # Screen scraper for Virgin Blue to look for happy hour deals
5 # 5 #
6 # Prints out (and emails) when criteria match based on cost, 6 # Prints out (and emails) when criteria match based on cost,
7 # destination, etc 7 # destination, etc
8 # 8 #
9 # $Id: scrape-vb.py,v 1.1.1.1 2007/08/25 05:17:29 darius Exp $ 9 # $Id: scrape-vb.py,v 1.2 2007/08/27 01:42:11 darius Exp $
10 ############################################################################ 10 ############################################################################
11 # 11 #
12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved. 12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved.
13 # 13 #
14 # Redistribution and use in source and binary forms, with or without 14 # Redistribution and use in source and binary forms, with or without
32 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 # SUCH DAMAGE. 33 # SUCH DAMAGE.
34 # 34 #
35 ############################################################################ 35 ############################################################################
36 36
37 import re, BeautifulSoup, datetime, time, smtplib, sys, urllib 37 import re, BeautifulSoup, datetime, time, smtplib, sys, urllib, ConfigParser
38
39 #### Configuration
40
41 ### Travel criteria
42 # Supported keys are email, when, city1, city2, maxcost
43 # email is mandatory. If city2 is not present either city will be
44 # matched. when and maxcost are optional (will match for any date or
45 # cost)
46 travellers = [
47 { 'email' : 'darius@dons.net.au', 'city1' : 'Sydney' },
48 { 'email' : 'sarah.mahoney@nehta.gov.au', 'city1' : 'Adelaide', 'city2' : 'Brisbane' },
49 ]
50
51 ### Mail host
52 mailhost = 'mail.dons.net.au'
53
54 ### Who the email is from
55 mailfrom = 'darius@dons.net.au'
56
57 ### What's onn the subject linee
58 mailsubj = 'Virgin Blue Happy Hour Deals'
59
60 ### Actually send email?
61 mailsend = False
62
63 ### URL to parse
64 vburl = 'http://virginblue.com.au'
65 38
66 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) 39 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE)
67 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) 40 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE)
41
42 conf = ConfigParser.ConfigParser()
43 conf.add_section('global')
44 conf.set('global', 'mailsubj', 'Virgin Blue Happy Hour Deals')
45 conf.set('global', 'vburl', 'http://virginblue.com.au')
46 conf.read('scrape-vb.ini')
68 47
69 try: 48 try:
70 #f = open("vb-happyhour.html") 49 #f = open("vb-happyhour.html")
71 f = urllib.urlopen(vburl) 50 f = urllib.urlopen(vburl)
72 except IOError, e: 51 except IOError, e:
89 sys.exit(0) 68 sys.exit(0)
90 69
91 frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3]) 70 frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3])
92 totime = datetime.datetime(*time.strptime(times.group(2), "%d/%m/%y")[0:3]) 71 totime = datetime.datetime(*time.strptime(times.group(2), "%d/%m/%y")[0:3])
93 72
94 #print "Travel from %s to %s" % (str(frtime), str(totime))
95
96 output = {} 73 output = {}
97 for i in hrlist: 74 for i in hrlist:
98 href = i.find('a') 75 href = i.find('a')
99 match = parsetitle.match(href['title']) 76 match = parsetitle.match(href['title'])
100 if (match == None): 77 if (match == None):
104 city1 = match.group(1) 81 city1 = match.group(1)
105 city2 = match.group(2) 82 city2 = match.group(2)
106 cost = int(match.group(3)) 83 cost = int(match.group(3))
107 url = href['href'] 84 url = href['href']
108 85
109 for t in travellers: 86 for email in conf.sections():
110 if ('email' not in t): 87 if (email == 'global'):
111 print "No email key found, configuration error?"
112 continue 88 continue
113 89
90 t = {'email' : email}
91 for i in conf.items(email):
92 t[i[0]] = i[1]
93
114 citymatch = True 94 citymatch = True
115 if ('city1' in t and 'city2' in t): 95 if ('city1' in t and 'city2' in t):
116 if((t['city1'] != city1 or t['city2'] != city2) and 96 if((t['city1'] != city1 or t['city2'] != city2) and
117 (t['city1'] != city2 or t['city2'] != city1)): 97 (t['city1'] != city2 or t['city2'] != city1)):
118 citymatch = False 98 citymatch = False
135 if (t['email'] not in output): 115 if (t['email'] not in output):
136 116
137 output[t['email']] = [] 117 output[t['email']] = []
138 output[t['email']].append([city1, city2, cost, url]) 118 output[t['email']].append([city1, city2, cost, url])
139 119
120 try:
121 mailsubj = conf.get('global', 'mailsubj')
122 mailhost = conf.get('global', 'mailhost')
123 mailsend = conf.getboolean('global', 'mailsend')
124 mailfrom = conf.get('global', 'mailfrom')
125 except ConfigParser.NoOptionError:
126 mailsend = False
127
140 if (mailsend): 128 if (mailsend):
141 server = smtplib.SMTP(mailhost) 129 server = smtplib.SMTP(mailhost)
142 #server.set_debuglevel(1) 130 #server.set_debuglevel(1)
131 else:
132 print "Note: Mail sending disabled"
133
134 for o in output:
135 if (mailsend):
136 msg = ("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj))
137 msg = msg + "Your criteria for flights have been matched\r\n\r\n"
138 else:
139 print "Match for " + o
140 for i in output[o]:
141 if (mailsend):
142 msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3])
143 else:
144 print "%s <-> %s costs $%d" % (i[0], i[1], i[2])
143 145
144 for o in output: 146 ttimestr = "Note: travel period is from %s to %s" % \
145 msg = ("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj)) 147 (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y"))
146 msg = msg + "Your criteria for flights have been matched\r\n\r\n"
147 print "Sending email to " + o
148 for i in output[o]:
149 print "%s <-> %s costs $%d" % (i[0], i[1], i[2])
150 msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3])
151 148
152 msg = msg + "\r\nNote: travel period is from %s to %s" % \
153 (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y"))
154 if (mailsend): 149 if (mailsend):
150 msg = msg + "\r\n" + ttimestr + "\r\n"
155 server.sendmail(mailfrom, o, msg) 151 server.sendmail(mailfrom, o, msg)
156 else: 152 else:
157 print msg 153 print ttimestr
158 print 154 print