# HG changeset patch
# User darius
# Date 1188019049 0
# Node ID 8045db05180b7acee66088072e277911f8d25a02
# Parent accc4c4654d7e4e3c37edcd43308a086eb62e54a
Initial revision
diff -r accc4c4654d7 -r 8045db05180b scrape-vb.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scrape-vb.py Sat Aug 25 05:17:29 2007 +0000
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+
+############################################################################
+# Screen scraper for Virgin Blue to look for happy hour deals
+#
+# Prints out (and emails) when criteria match based on cost,
+# destination, etc
+#
+# $Id: scrape-vb.py,v 1.1.1.1 2007/08/25 05:17:29 darius Exp $
+############################################################################
+#
+# Copyright (C) 2007 Daniel O'Connor. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+############################################################################
+
+import re, BeautifulSoup, datetime, time, smtplib, sys, urllib
+
+#### Configuration
+
+### Travel criteria
+# Supported keys are email, when, city1, city2, maxcost
+# email is mandatory. If city2 is not present either city will be
+# matched. when and maxcost are optional (will match for any date or
+# cost)
+travellers = [
+ { 'email' : 'darius@dons.net.au', 'city1' : 'Sydney' },
+ { 'email' : 'sarah.mahoney@nehta.gov.au', 'city1' : 'Adelaide', 'city2' : 'Brisbane' },
+ ]
+
+### Mail host
+mailhost = 'mail.dons.net.au'
+
+### Who the email is from
+mailfrom = 'darius@dons.net.au'
+
+### What's onn the subject linee
+mailsubj = 'Virgin Blue Happy Hour Deals'
+
+### Actually send email?
+mailsend = False
+
+### URL to parse
+vburl = 'http://virginblue.com.au'
+
+parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE)
+parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE)
+
+try:
+ #f = open("vb-happyhour.html")
+ f = urllib.urlopen(vburl)
+except IOError, e:
+ print "Unable to fetch page - " + str(e)
+ sys.exit(1)
+
+s = BeautifulSoup.BeautifulSoup(f)
+hrr = s.find("ul", "happyhr-rows")
+if (hrr == None):
+ print "No happy hour details found"
+ sys.exit(0)
+
+hrlist = hrr.findAll("li")
+
+# XXX: I wanted to use findAll('ul', 'happyhr-conditions') but it
+# doesn't work
+times = parsetper.match(s.findAll('ul')[11].find('li').string)
+if (times == None):
+ print "Unable to parse travel period " + parsetper.match(s.findAll('ul')[11].find('li'))
+ sys.exit(0)
+
+frtime = datetime.datetime(*time.strptime(times.group(1), "%d/%m/%y")[0:3])
+totime = datetime.datetime(*time.strptime(times.group(2), "%d/%m/%y")[0:3])
+
+#print "Travel from %s to %s" % (str(frtime), str(totime))
+
+output = {}
+for i in hrlist:
+ href = i.find('a')
+ match = parsetitle.match(href['title'])
+ if (match == None):
+ print "Unable to match " + str(s)
+ continue
+
+ city1 = match.group(1)
+ city2 = match.group(2)
+ cost = int(match.group(3))
+ url = href['href']
+
+ for t in travellers:
+ if ('email' not in t):
+ print "No email key found, configuration error?"
+ continue
+
+ citymatch = True
+ if ('city1' in t and 'city2' in t):
+ if((t['city1'] != city1 or t['city2'] != city2) and
+ (t['city1'] != city2 or t['city2'] != city1)):
+ citymatch = False
+ elif ('city1' in t):
+ if (t['city1'] != city1 and t['city1'] != city2):
+ citymatch = False
+
+ datematch = True
+ if ('when' in t):
+ travtime = datetime.datetime(*time.strptime(t['when'], "%d/%m/%y")[0:3])
+ if (travtime < frtime or travtime > totime):
+ datematch = False
+
+ costmatch = True
+ if ('maxcost' in t):
+ if (cost > int(t['maxcost'])):
+ costmatch = False
+
+ if (citymatch and datematch and costmatch):
+ if (t['email'] not in output):
+
+ output[t['email']] = []
+ output[t['email']].append([city1, city2, cost, url])
+
+if (mailsend):
+ server = smtplib.SMTP(mailhost)
+ #server.set_debuglevel(1)
+
+for o in output:
+ msg = ("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (mailfrom, o, mailsubj))
+ msg = msg + "Your criteria for flights have been matched\r\n\r\n"
+ print "Sending email to " + o
+ for i in output[o]:
+ print "%s <-> %s costs $%d" % (i[0], i[1], i[2])
+ msg = msg + "%s <-> %s costs $%d - %s\r\n" % (i[0], i[1], i[2], i[3])
+
+ msg = msg + "\r\nNote: travel period is from %s to %s" % \
+ (frtime.strftime("%A %e %B %Y"), totime.strftime("%A %e %B %Y"))
+ if (mailsend):
+ server.sendmail(mailfrom, o, msg)
+ else:
+ print msg
+ print
diff -r accc4c4654d7 -r 8045db05180b vb-happyhour.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/vb-happyhour.html Sat Aug 25 05:17:29 2007 +0000
@@ -0,0 +1,1088 @@
+
+
+
+
+
+
+ Cheap Flights and Holidays with Great Service from the World's Best Low Cost Airline
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Cheap Flights
Looking for cheap flights or a cheap holiday deal with flights included? We have both right here at virginblue.com.au or check out Blueholidays.com.au for great holiday deals.
+
Did you know Virgin Blue has cheap flights available every single day between 12pm and 1pm? We also offer cheap flights in our V-mail newsletter which you can signup for free here
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Happy Hour Ends In
+ 00
+ hrs
+ 00
+ mins
+ 00
+ secs
+
Looking for cheap flights or a cheap holiday deal with flights included? We have both right here at virginblue.com.au or check out Blueholidays.com.au for great holiday deals.
+
Did you know Virgin Blue has cheap flights available every single day between 12pm and 1pm? We also offer cheap flights in our V-mail newsletter which you can signup for free here