comparison scrape-vb.py @ 6:9f3eb9a07966

Add config parser. Add ability to print out an example config file. Add -f option to read a saved page.
author darius
date Wed, 29 Aug 2007 07:37:59 +0000
parents 275603a8e2ae
children bf896507faa9
comparison
equal deleted inserted replaced
5:275603a8e2ae 6:9f3eb9a07966
4 # Screen scraper for Virgin Blue to look for happy hour deals 4 # Screen scraper for Virgin Blue to look for happy hour deals
5 # 5 #
6 # Prints out (and emails) when criteria match based on cost, 6 # Prints out (and emails) when criteria match based on cost,
7 # destination, etc 7 # destination, etc
8 # 8 #
9 # $Id: scrape-vb.py,v 1.4 2007/08/28 02:58:50 darius Exp $ 9 # $Id: scrape-vb.py,v 1.5 2007/08/29 07:37:59 darius Exp $
10 ############################################################################ 10 ############################################################################
11 # 11 #
12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved. 12 # Copyright (C) 2007 Daniel O'Connor. All rights reserved.
13 # 13 #
14 # Redistribution and use in source and binary forms, with or without 14 # Redistribution and use in source and binary forms, with or without
32 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 # SUCH DAMAGE. 33 # SUCH DAMAGE.
34 # 34 #
35 ############################################################################ 35 ############################################################################
36 36
37 import os, re, BeautifulSoup, datetime, time, smtplib, sys, urllib, ConfigParser 37 import os, re, BeautifulSoup, datetime, time, smtplib, sys, urllib
38 import ConfigParser, optparse
38 39
40 usage = '''%prog [options]
41 Reads configuration from ./scrape-vb.ini and ~/.scrape-vb.ini'''
42
43 optparse = optparse.OptionParser(usage, version="$Id: scrape-vb.py,v 1.5 2007/08/29 07:37:59 darius Exp $")
44 optparse.add_option('-d', '--debug', action="store_true", default=False,
45 help="Disable mail sending, prints mail message to stdout")
46 optparse.add_option('-f', '--file', help="Do not fetch the page, use this file instead")
47 optparse.add_option('-e', '--example', action="store_true", default=False,
48 help="Print an example configuration file to stdout and exit")
49 (options, args) = optparse.parse_args()
50
51 if (options.example):
52 print '''[global]
53 mailsubj="Subject line for emails"
54 # The following 3 options are necessary before email will be sent
55 mailfrom=user@host.com
56 mailsend=True
57 mailhost=mail.server.com
58
59 [user@host.com]
60 # All fields are optional
61 city1=Foo
62 city2=Bar
63 when=dd/mm/yy
64 maxcost=123
65 '''
66 sys.exit(0)
67
39 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE) 68 parsetitle = re.compile('([a-z ]+) - ([a-z ]+) \$([0-9]+)', re.IGNORECASE)
40 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE) 69 parsetper = re.compile('Travel Period: ([0-9]+/[0-9]+/[0-9]+) - ([0-9]+/[0-9]+/[0-9]+)', re.IGNORECASE)
41 70
42 conf = ConfigParser.ConfigParser() 71 conf = ConfigParser.ConfigParser()
43 conf.add_section('global') 72 conf.add_section('global')
48 if ('HOME' in os.environ): 77 if ('HOME' in os.environ):
49 conflist.append(os.path.expanduser('~/.scrape-vb.ini')) 78 conflist.append(os.path.expanduser('~/.scrape-vb.ini'))
50 conf.read(conflist) 79 conf.read(conflist)
51 80
52 try: 81 try:
53 #f = open("vb-happyhour.html") 82 if (options.file != None):
54 f = urllib.urlopen(conf.get('global', 'vburl')) 83 f = open(options.file)
84 else:
85 f = urllib.urlopen(conf.get('global', 'vburl'))
55 except IOError, e: 86 except IOError, e:
56 print "Unable to fetch page - " + str(e) 87 print "Unable to fetch page - " + str(e)
57 sys.exit(1) 88 sys.exit(1)
58 89
59 s = BeautifulSoup.BeautifulSoup(f) 90 s = BeautifulSoup.BeautifulSoup(f)
88 url = href['href'] 119 url = href['href']
89 120
90 for email in conf.sections(): 121 for email in conf.sections():
91 if (email == 'global'): 122 if (email == 'global'):
92 continue 123 continue
93 124 # Stuff configuration into a dictionary for our convenience
94 t = {'email' : email} 125 t = {'email' : email}
95 for i in conf.items(email): 126 for i in conf.items(email):
96 t[i[0]] = i[1] 127 t[i[0]] = i[1]
97 128
98 citymatch = True 129 citymatch = True
126 mailhost = conf.get('global', 'mailhost') 157 mailhost = conf.get('global', 'mailhost')
127 mailsend = conf.getboolean('global', 'mailsend') 158 mailsend = conf.getboolean('global', 'mailsend')
128 mailfrom = conf.get('global', 'mailfrom') 159 mailfrom = conf.get('global', 'mailfrom')
129 except ConfigParser.NoOptionError: 160 except ConfigParser.NoOptionError:
130 mailsend = False 161 mailsend = False
162
163 if (options.debug == True and mailsend):
164 print "mailsend overridden due to debugging"
165 mailsend = False
131 166
132 if (mailsend): 167 if (mailsend):
133 server = smtplib.SMTP(mailhost) 168 server = smtplib.SMTP(mailhost)
134 #server.set_debuglevel(1) 169 #server.set_debuglevel(1)
135 else: 170 else: