Mercurial > ~darius > hgwebdir.cgi > scrape-vb
comparison scrape-vb.py @ 6:9f3eb9a07966
Add config parser.
Add ability to print out an example config file.
Add -f option to read a saved page.
author | darius |
---|---|
date | Wed, 29 Aug 2007 07:37:59 +0000 |
parents | 275603a8e2ae |
children | bf896507faa9 |
comparison
equal
deleted
inserted
replaced
5:275603a8e2ae | 6:9f3eb9a07966 |
---|---|
4 # Screen scraper for Virgin Blue to look for happy hour deals | 4 # Screen scraper for Virgin Blue to look for happy hour deals |
5 # | 5 # |
6 # Prints out (and emails) when criteria match based on cost, | 6 # Prints out (and emails) when criteria match based on cost, |
7 # destination, etc | 7 # destination, etc |
8 # | 8 # |
9 # $Id: scrape-vb.py,v 1.4 2007/08/28 02:58:50 darius Exp $ | 9 # $Id: scrape-vb.py,v 1.5 2007/08/29 07:37:59 darius Exp $ |
10 ############################################################################ | 10 ############################################################################ |
11 # | 11 # |
12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved. | 12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved. |
13 # | 13 # |
14 # Redistribution and use in source and binary forms, with or without | 14 # Redistribution and use in source and binary forms, with or without |
32 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 32 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 # SUCH DAMAGE. | 33 # SUCH DAMAGE. |
34 # | 34 # |
35 ############################################################################ | 35 ############################################################################ |
36 | 36 |
37 import os, re, BeautifulSoup, datetime, time, smtplib, sys, urllib, ConfigParser | 37 import os, re, BeautifulSoup, datetime, time, smtplib, sys, urllib |
38 import ConfigParser, optparse | |
38 | 39 |
40 usage = '''%prog [options] | |
41 Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini''' | |
42 | |
43 optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.5 2007/08/29 07:37:59 darius Exp $") | |
44 optparse.add_option('-d', '--debug', action="store_true", default=False, | |
45 help="Disable mail sending, prints mail message to stdout") | |
46 optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead") | |
47 optparse.add_option('-e', '--example', action="store_true", default=False, | |
48 help="Print an example configuration file to stdout and exit") | |
49 (options, args) = optparse.parse_args() | |
50 | |
51 if (options.example): | |
52 print '''[global] | |
53 mailsubj="Subject line for emails" | |
54 # The following 3 options are necessary before email will be sent | |
55 mailfrom=user@host.com | |
56 mailsend=True | |
57 mailhost=mail.server.com | |
58 | |
59 [user@host.com] | |
60 # All fields are optional | |
61 city1=Foo | |
62 city2=Bar | |
63 when=dd/mm/yy | |
64 maxcost=123 | |
65 ''' | |
66 sys.exit(0) | |
67 | |
39 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) | 68 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) |
40 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) | 69 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) |
41 | 70 |
42 conf = ConfigParser.ConfigParser() | 71 conf = ConfigParser.ConfigParser() |
43 conf.add_section('global') | 72 conf.add_section('global') |
48 if ('HOME' in os.environ): | 77 if ('HOME' in os.environ): |
49 conflist.append(os.path.expanduser('~/.scrape-vb.ini')) | 78 conflist.append(os.path.expanduser('~/.scrape-vb.ini')) |
50 conf.read(conflist) | 79 conf.read(conflist) |
51 | 80 |
52 try: | 81 try: |
53 #f = open("vb-happyhour.html") | 82 if (options.file != None): |
54 f = urllib.urlopen(conf.get('global', 'vburl')) | 83 f = open(options.file) |
84 else: | |
85 f = urllib.urlopen(conf.get('global', 'vburl')) | |
55 except IOError, e: | 86 except IOError, e: |
56 print "Unable to fetch page - " + str(e) | 87 print "Unable to fetch page - " + str(e) |
57 sys.exit(1) | 88 sys.exit(1) |
58 | 89 |
59 s = BeautifulSoup.BeautifulSoup(f) | 90 s = BeautifulSoup.BeautifulSoup(f) |
88 url = href['href'] | 119 url = href['href'] |
89 | 120 |
90 for email in conf.sections(): | 121 for email in conf.sections(): |
91 if (email == 'global'): | 122 if (email == 'global'): |
92 continue | 123 continue |
93 | 124 # Stuff configuration into a dictionary for our convenience |
94 t = {'email' : email} | 125 t = {'email' : email} |
95 for i in conf.items(email): | 126 for i in conf.items(email): |
96 t[i[0]] = i[1] | 127 t[i[0]] = i[1] |
97 | 128 |
98 citymatch = True | 129 citymatch = True |
126 mailhost = conf.get('global', 'mailhost') | 157 mailhost = conf.get('global', 'mailhost') |
127 mailsend = conf.getboolean('global', 'mailsend') | 158 mailsend = conf.getboolean('global', 'mailsend') |
128 mailfrom = conf.get('global', 'mailfrom') | 159 mailfrom = conf.get('global', 'mailfrom') |
129 except ConfigParser.NoOptionError: | 160 except ConfigParser.NoOptionError: |
130 mailsend = False | 161 mailsend = False |
162 | |
163 if (options.debug == True and mailsend): | |
164 print "mailsend overridden due to debugging" | |
165 mailsend = False | |
131 | 166 |
132 if (mailsend): | 167 if (mailsend): |
133 server = smtplib.SMTP(mailhost) | 168 server = smtplib.SMTP(mailhost) |
134 #server.set_debuglevel(1) | 169 #server.set_debuglevel(1) |
135 else: | 170 else: |