Mercurial > ~darius > hgwebdir.cgi > scrape-vb
diff scrape-vb.py @ 3:89232ea0c3d4
Read configuration from an ini file rather than hard coding it in the
source.
author | darius |
---|---|
date | Mon, 27 Aug 2007 01:42:11 +0000 |
parents | 8045db05180b |
children | e3f4ef0b6e39 |
line wrap: on
line diff
--- a/scrape-vb.py Tue Oct 23 10:09:45 2007 +0930 +++ b/scrape-vb.py Mon Aug 27 01:42:11 2007 +0000 @@ -6,7 +6,7 @@ # Prints out (and emails) when criteria match based on cost, # destination, etc # -# $Id: scrape-vb.py,v 1.1.1.1 2007/08/25 05:17:29 darius Exp $ +# $Id: scrape-vb.py,v 1.2 2007/08/27 01:42:11 darius Exp $ ############################################################################ # # Copyright (C) 2007 Daniel O'Connor. All rights reserved. @@ -34,38 +34,17 @@ # ############################################################################ -import re, BeautifulSoup, datetime, time, smtplib, sys, urllib - -#### Configuration - -### Travel criteria -# Supported keys are email, when, city1, city2, maxcost -# email is mandatory. If city2 is not present either city will be -# matched. when and maxcost are optional (will match for any date or -# cost) -travellers = [ - { 'email' : 'darius@dons.net.au', 'city1' : 'Sydney' }, - { 'email' : 'sarah.mahoney@nehta.gov.au', 'city1' : 'Adelaide', 'city2' : 'Brisbane' }, - ] - -### Mail host -mailhost = 'mail.dons.net.au' - -### Who the email is from -mailfrom = 'darius@dons.net.au' - -### What's onn the subject linee -mailsubj = 'Virgin Blue Happy Hour Deals' - -### Actually send email? -mailsend = False - -### URL to parse -vburl = 'http://virginblue.com.au' +import re, BeautifulSoup, datetime, time, smtplib, sys, urllib, ConfigParser parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) +conf = ConfigParser.ConfigParser() +conf.add_section('global') +conf.set('global', 'mailsubj', 'Virgin Blue Happy Hour Deals') +conf.set('global', 'vburl', 'http://virginblue.com.au') +conf.read('scrape-vb.ini') + try: #f = open("vb-happyhour.html") f = urllib.urlopen(vburl) @@ -91,8 +70,6 @@ frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3]) totime = datetime.datetime(*time.strptime(times.group(2), "%d/%m/%y")[0:3]) -#print "Travel from %s to %s" % (str(frtime), str(totime)) - output = {} for i in hrlist: href = i.find('a') @@ -106,11 +83,14 @@ cost = int(match.group(3)) url = href['href'] - for t in travellers: - if ('email' not in t): - print "No email key found, configuration error?" + for email in conf.sections(): + if (email == 'global'): continue - + + t = {'email' : email} + for i in conf.items(email): + t[i[0]] = i[1] + citymatch = True if ('city1' in t and 'city2' in t): if((t['city1'] != city1 or t['city2'] != city2) and @@ -137,22 +117,38 @@ output[t['email']] = [] output[t['email']].append([city1, city2, cost, url]) +try: + mailsubj = conf.get('global', 'mailsubj') + mailhost = conf.get('global', 'mailhost') + mailsend = conf.getboolean('global', 'mailsend') + mailfrom = conf.get('global', 'mailfrom') +except ConfigParser.NoOptionError: + mailsend = False + if (mailsend): server = smtplib.SMTP(mailhost) #server.set_debuglevel(1) - +else: + print "Note: Mail sending disabled" + for o in output: - msg = ("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj)) - msg = msg + "Your criteria for flights have been matched\r\n\r\n" - print "Sending email to " + o + if (mailsend): + msg = ("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj)) + msg = msg + "Your criteria for flights have been matched\r\n\r\n" + else: + print "Match for " + o for i in output[o]: - print "%s <-> %s costs $%d" % (i[0], i[1], i[2]) - msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3]) + if (mailsend): + msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3]) + else: + print "%s <-> %s costs $%d" % (i[0], i[1], i[2]) - msg = msg + "\r\nNote: travel period is from %s to %s" % \ - (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y")) + ttimestr = "Note: travel period is from %s to %s" % \ + (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y")) + if (mailsend): + msg = msg + "\r\n" + ttimestr + "\r\n" server.sendmail(mailfrom, o, msg) else: - print msg - print + print ttimestr + print