def get_money_range(s, logger):
    res = ["0", "-1"]
    s = s.strip()
    if re.match('^.*\+$', s):
        money_val = themortgagemeter_utils.get_money(s, logger)
        res[0] = money_val
    elif re.match('^.*up to.*$', s) or re.match('^.*under.*$', s):
        money_val = themortgagemeter_utils.get_money(s, logger)
        res[1] = money_val
    elif re.match('^.*or more$', s):
        index = s.find("£")
        s = s[index:]
        res[0] = s.split()[0][2:].translate(None, ',')
    elif re.match('^.* - .*$', s):
        index = s.find("£")
        s = s[index:]
        res[0] = s.split()[0][2:].translate(None, ',')
        res[1] = s.split()[2][2:].translate(None, ',')
    elif re.match('^.*[0-9] to .*$', s):
        index = s.find("£")
        s = s[index:]
        res[0] = s.split()[0][2:].translate(None, ',')
        res[1] = s.split()[2][2:].translate(None, ',')
    else:
        #logger.info(t) #logger.info('value not handled: ' + s)
        themortgagemeter_utils.record_alert(
            'ERROR: value not properly parsed by get_money_range: ' + s,
            logger, themortgagemeter_db.db_connection,
            themortgagemeter_db.cursor)
        exit()
    return res
Exemplo n.º 2
0
def get_money_range(s,logger):
	res = ["0","-1"]
	s = s.strip()
	if re.match('^.*\+$',s):
		money_val = themortgagemeter_utils.get_money(s,logger)
		res[0] = money_val
	elif re.match('^.*up to.*$',s) or re.match('^.*under.*$',s):
		money_val = themortgagemeter_utils.get_money(s,logger)
		res[1] = money_val
	elif re.match('^.*or more$',s):
		index = s.find("£")
		s = s[index:]
		res[0] = s.split()[0][2:].translate(None,',')
	elif re.match('^.* - .*$',s):
		index = s.find("£")
		s = s[index:]
		res[0] = s.split()[0][2:].translate(None,',')
		res[1] = s.split()[2][2:].translate(None,',')
	elif re.match('^.*[0-9] to .*$',s):
		index = s.find("£")
		s = s[index:]
		res[0] = s.split()[0][2:].translate(None,',')
		res[1] = s.split()[2][2:].translate(None,',')
	else:
		#logger.info(t) #logger.info('value not handled: ' + s)
		themortgagemeter_utils.record_alert('ERROR: value not properly parsed by get_money_range: ' + s,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	return res
Exemplo n.º 3
0
def update_changes(val,institution_code,logger):
	global changes
	logger.info('Updating changes bool. Changes currently set to: ' + str(changes))
	if changes != val:
		import themortgagemeter_db
		import themortgagemeter_utils
		changes = val
		logger.info('Changes changed to: ' + str(val))
		themortgagemeter_utils.record_alert('MORTGAGE_CHANGE: ' + institution_code,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
Exemplo n.º 4
0
def update_changes(val, institution_code, logger):
    global changes
    logger.info('Updating changes bool. Changes currently set to: ' +
                str(changes))
    if changes != val:
        import themortgagemeter_db
        import themortgagemeter_utils
        changes = val
        logger.info('Changes changed to: ' + str(val))
        themortgagemeter_utils.record_alert(
            'MORTGAGE_CHANGE: ' + institution_code, logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
Exemplo n.º 5
0
def main():
    global changes
    import themortgagemeter_db
    import themortgagemeter_utils
    themortgagemeter_db.open_db()
    if args.logging == 'DEBUG':
        logger = themortgagemeter_utils.setup_logging(logging.DEBUG,
                                                      stdout=args.stdout)
    elif args.logging == 'INFO':
        logger = themortgagemeter_utils.setup_logging(logging.INFO,
                                                      stdout=args.stdout)
    elif args.logging == 'WARNING':
        logger = themortgagemeter_utils.setup_logging(logging.WARNING,
                                                      stdout=args.stdout)
    elif args.logging == 'ERROR':
        logger = themortgagemeter_utils.setup_logging(logging.ERROR,
                                                      stdout=args.stdout)
    elif args.logging == 'CRITICAL':
        logger = themortgagemeter_utils.setup_logging(logging.CRITICAL,
                                                      stdout=args.stdout)
    elif args.logging == 'STDOUT':
        logger = themortgagemeter_utils.setup_logging(logging.CRITICAL,
                                                      stdout=args.stdout)
    logger.info('Program starting: %s', args.institution)
    try:
        #if args.institution == 'NTNWD':
        #	import nationwide
        #	nationwide.nationwide_main(args.static,args.forcedelete,logger)
        if args.institution == 'HSBC':
            import hsbc
            hsbc.hsbc_main(args.static, args.forcedelete, logger)
        elif args.institution == 'HLFX':
            import halifax
            halifax.halifax_main(args.static, args.forcedelete, logger)
        else:
            raise Exception('Need to supply an institution', '')
        if not args.test:
            themortgagemeter_db.db_connection.commit()
        else:
            logger.info('Not committing data, as --test passed in')
            themortgagemeter_db.db_connection.rollback()
    except Exception as e:
        logger.critical('Error was thrown, quitting')
        logger.exception('Error was:')
        themortgagemeter_utils.record_alert('ERROR: ' + args.institution,
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
    logger.info('Program complete for institution: %s', args.institution)
    # TOOD: why does this never seem to be set to true?
    global changes
    logger.info('Changes is: ' + str(changes))
    themortgagemeter_db.commit_db()
Exemplo n.º 6
0
def get_mortgage_type(s,logger):
	str_lower = s.lower()
	res = re.match(r'^.*(fixed|tracker|variable|discount).*$',themortgagemeter_utils.remove_non_ascii(str_lower))
	type_str = res.group(1)
	if type_str == 'fixed':
		mortgage_type = 'F'
	elif type_str == 'tracker' or type_str == 'variable' or type_str == 'discount':
		mortgage_type = 'T'
	else:
		logger.critical('unable to determine mortgage_type from str: ' + type_str)
		themortgagemeter_utils.record_alert('ERROR: unable to determine mortgage_type from str',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
	return mortgage_type
Exemplo n.º 7
0
def get_mortgage_type(s, logger):
    str_lower = s.lower()
    res = re.match(r'^.*(fixed|tracker|variable|discount).*$',
                   themortgagemeter_utils.remove_non_ascii(str_lower))
    type_str = res.group(1)
    if type_str == 'fixed':
        mortgage_type = 'F'
    elif type_str == 'tracker' or type_str == 'variable' or type_str == 'discount':
        mortgage_type = 'T'
    else:
        logger.critical('unable to determine mortgage_type from str: ' +
                        type_str)
        themortgagemeter_utils.record_alert(
            'ERROR: unable to determine mortgage_type from str', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
    return mortgage_type
Exemplo n.º 8
0
def main():
	global changes
	import themortgagemeter_db
	import themortgagemeter_utils
	themortgagemeter_db.open_db()
	if args.logging == 'DEBUG':
		logger = themortgagemeter_utils.setup_logging(logging.DEBUG,stdout=args.stdout)
	elif args.logging == 'INFO':
		logger = themortgagemeter_utils.setup_logging(logging.INFO,stdout=args.stdout)
	elif args.logging == 'WARNING':
		logger = themortgagemeter_utils.setup_logging(logging.WARNING,stdout=args.stdout)
	elif args.logging == 'ERROR':
		logger = themortgagemeter_utils.setup_logging(logging.ERROR,stdout=args.stdout)
	elif args.logging == 'CRITICAL':
		logger = themortgagemeter_utils.setup_logging(logging.CRITICAL,stdout=args.stdout)
	elif args.logging == 'STDOUT':
		logger = themortgagemeter_utils.setup_logging(logging.CRITICAL,stdout=args.stdout)
	logger.info('Program starting: %s', args.institution)
	try:
		#if args.institution == 'NTNWD':
		#	import nationwide
		#	nationwide.nationwide_main(args.static,args.forcedelete,logger)
		if args.institution == 'HSBC':
			import hsbc
			hsbc.hsbc_main(args.static,args.forcedelete,logger)
		elif args.institution == 'HLFX':
			import halifax
			halifax.halifax_main(args.static,args.forcedelete,logger)
		else:
			raise Exception('Need to supply an institution','')
		if not args.test:
			themortgagemeter_db.db_connection.commit()
		else:
			logger.info('Not committing data, as --test passed in')
			themortgagemeter_db.db_connection.rollback()
	except Exception as e:
		logger.critical('Error was thrown, quitting')
		logger.exception('Error was:')
		themortgagemeter_utils.record_alert('ERROR: ' + args.institution,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
	logger.info('Program complete for institution: %s', args.institution)
	# TOOD: why does this never seem to be set to true?
	global changes
	logger.info('Changes is: ' + str(changes))
	themortgagemeter_db.commit_db()
Exemplo n.º 9
0
def check_data(rate_percent, booking_fee, ltv_percent, apr_percent,
               initial_period, logger):
    # Now we check that the values we have are the right type:
    if themortgagemeter_utils.isnumber(rate_percent) != True:
        logger.critical('problem with rate_percent:' + rate_percent)
        themortgagemeter_utils.record_alert('ERROR: problem with rate_percent',
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
        exit()
    elif booking_fee.isdigit() != True:
        logger.critical('problem with booking_fee:' + booking_fee)
        themortgagemeter_utils.record_alert('ERROR: problem with booking_fee',
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(ltv_percent) != True:
        logger.critical('problem with ltv_percent: ' + ltv_percent)
        themortgagemeter_utils.record_alert('ERROR: problem with ltv_percent',
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(apr_percent) != True:
        logger.critical('problem with apr_percent: ' + apr_percent)
        themortgagemeter_utils.record_alert('ERROR: problem with apr_percent',
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
        exit()
    # mortgage type must be ok
    elif str(initial_period).isdigit() != True:
        logger.critical('problem with initial_period: ' + str(initial_period))
        themortgagemeter_utils.record_alert(
            'ERROR: problem with initial_period', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
Exemplo n.º 10
0
def check_data(rate_percent,booking_fee,ltv_percent,apr_percent,initial_period,logger):
	# Now we check that the values we have are the right type:
	if themortgagemeter_utils.isnumber(rate_percent) != True:
		logger.critical('problem with rate_percent:' + rate_percent)
		themortgagemeter_utils.record_alert('ERROR: problem with rate_percent',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif booking_fee.isdigit() != True:
		logger.critical('problem with booking_fee:' + booking_fee)
		themortgagemeter_utils.record_alert('ERROR: problem with booking_fee',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(ltv_percent) != True:
		logger.critical('problem with ltv_percent: ' + ltv_percent)
		themortgagemeter_utils.record_alert('ERROR: problem with ltv_percent',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(apr_percent) != True:
		logger.critical('problem with apr_percent: ' + apr_percent)
		themortgagemeter_utils.record_alert('ERROR: problem with apr_percent',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	# mortgage type must be ok
	elif str(initial_period).isdigit() != True:
		logger.critical('problem with initial_period: ' + str(initial_period))
		themortgagemeter_utils.record_alert('ERROR: problem with initial_period',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
Exemplo n.º 11
0
def get_product_page(static, url, eligibilities):
    logger = logging.getLogger('retrieve')
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/post_office/our-full-range.html', url, logger)
    #print bsobj
    term = str(25 * 12)
    ltv_elems = bsobj.find_all('h2')
    # foreach h2 element, determine the ltv.
    # then get the next element (which is the div, class displaytable). Then, for each tr:
    # td0 = years of fixed or tracker
    # td1 = initial rate
    # td2 = svr
    # td3 = apr
    # td4 = fees
    for ltv_elem in ltv_elems:
        # For post office, first reported % is 100 - LTV
        ltv_elem_str = ltv_elem.string
        if (ltv_elem_str):
            ltv_percent = themortgagemeter_utils.get_percentage(
                ltv_elem_str, logger)
            if ltv_percent != '':
                ltv_percent = str(100 - int(ltv_percent))
            else:
                continue
        else:
            continue
        div = ltv_elem.fetchNextSiblings(attrs={'class': 'displaytable'},
                                         limit=1)
        if (div):
            logger.debug('here')
            logger.debug(div)
            trs = div[0].find_all('tr')
            for tr in trs:
                logger.debug(tr)
                # initialise:
                rate_percent = ''
                svr_percent = ''
                apr_percent = ''
                booking_fee = ''
                tds = tr.find_all('td')
                i = 0
                # If there are tds and there are more than 1 of them then we can extract a mortgage...
                logger.debug(tr)
                if tds and len(tds) > 1:
                    logger.debug(tds[0].text.encode('utf-8').split('\n'))
                    s = tds[0].text.encode('utf-8').split('\n')
                    # Sometimes we get empty fields - we remove them here.
                    while '' in s:
                        s.remove('')
                    initial_period = str(
                        themortgagemeter_utils.get_months(s[i], logger))
                    #logger.debug('type_str before split: ' + tds[i].text.encode('utf-8'))
                    #logger.debug('tds i: ' + str(i) + ' tds: ' + str(tds))
                    #logger.debug('tds i: ' + str(i) + ' tds[i]: ' + str(tds[i].text.encode('utf-8')))
                    #logger.debug(re.sub('\xa0','',tds[i].text.encode('utf-8')).split())
                    # TODO: generic text cleansing function
                    type_str = re.sub(
                        '\xa0', '',
                        re.sub('\xc2', ' ',
                               tds[i].text.encode('utf-8'))).split()[2]
                    logger.debug('type_str: ' + type_str)
                    if type_str == 'fixed':
                        mortgage_type = 'F'
                    elif type_str == 'tracker':
                        mortgage_type = 'T'
                    else:
                        themortgagemeter_utils.record_alert(
                            'ERROR: PSTFFC neither fixed nor tracker: ' +
                            type_str, logger,
                            themortgagemeter_db.db_connection,
                            themortgagemeter_db.cursor)
                    i += 1
                    j = 0
                    for td in tds[i].text.encode('utf-8').split('\n'):
                        t = tds[i].text.encode('utf-8').split('\n')[j]
                        rate_percent = themortgagemeter_utils.get_percentage(
                            t, logger)
                        if rate_percent != '':
                            break
                        j += 1
                    while svr_percent == '':
                        i += 1
                        for t in tds[i].text.encode('utf-8').split('\n'):
                            svr_percent = themortgagemeter_utils.get_percentage(
                                t, logger)
                            if svr_percent != '':
                                break
                    while apr_percent == '':
                        i += 1
                        for t in tds[i].text.encode('utf-8').split('\n'):
                            apr_percent = themortgagemeter_utils.get_percentage(
                                t, logger)
                            if apr_percent != '':
                                break
                    i += 1
                    booking_fee = tds[i].text.strip().encode(
                        'utf-8')[2:].replace(',', '')
                    for eligibility in eligibilities:
                        mc_util.handle_mortgage_insert(
                            institution_code, mortgage_type, rate_percent,
                            svr_percent, apr_percent, ltv_percent,
                            initial_period, booking_fee, term, url,
                            eligibility, logger)
        else:
            pass
Exemplo n.º 12
0
def get_product_pages(static,base_url,ext_url,logger):
	url = base_url + ext_url
	urls_seen = []
	bsobj = themortgagemeter_utils.get_page(static,'static_html/halifax/savings-accounts.html',url,logger)
	# let's see how much info we can extract from the page
	# Get all the sortable tables and divine as much info as possible from that.
	sortable_tables = doormatCols = bsobj.find_all(attrs={'class' : 'sortableTable'})
	for table in sortable_tables:
		#print table
		for tr in table.find_all('tr'):
			td_idx = 0
			savings_data = savings_util.get_savings_data_object()
			for td in tr.find_all('td'):
				td_text = td.text.encode('utf-8').strip().lower()
				if td_idx == 0:
					# title of account - Junior == child
					#re.match('/product/A[0-9]+.*',href)
					##0 ##<td style="text-align: left;"><a href="/savings/accounts/cash-isas/isa-saver-online/">ISA Saver Online</a></td>
					##1 ##<td style="text-align: left;"><strong class="apr">1.35%</strong> tax free/AER variable including 12 month fixed bonus of<strong> </strong>1.10%</td>
					##2 ##<td>£1</td>
					##3 ##<td>Variable</td>
					##4 ##<td>Unlimited</td>
					##5 ##<td style="text-align: center;"><img alt="Online" src="/common/images/icons/mousegrey.gif" title="Online"/></td>
					##6 ##<td><a href="/savings/accounts/cash-isas/isa-saver-online/"><img alt="Find out more" src="/common/images/Buttons/primary_find_out_more.gif"/></a></td>
					#print "0: " + td_text
					if re.match('^.*isa.*$',td_text):
						savings_data['isa'] = 'Y'
					if re.match('^.*junior.*$',td_text):
						savings_data['child'] = 'Y'
				elif td_idx == 1:
					#print "1: " + td_text
					# We don't bother with this at the moment - TODO - sort this out
					#if re.match('.*bonus.*',td_text):
					#	savings_data['bonus'] = 'Y'
					pass
				elif td_idx == 2:
					#print "2: " + td_text
					# minimum investment, max is always infinity
					min_amt = themortgagemeter_utils.get_money(td_text,logger)
					savings_data['min_amt'] = min_amt
				elif td_idx == 3:
					#print "3: " + td_text
					# Variable/Fixed
					if re.match('.*variable.*',td_text):
						savings_data['variability'] = 'V'
					elif re.match('.*fixed.*',td_text):
						savings_data['variability'] = 'F'
					else:
						themortgagemeter_utils.record_alert('ERROR: unknown variability: ' + td_text,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
						exit()
				elif td_idx == 4:
					#print "4: " + td_text
					# Let's assume we'll get this info from the sub-page.
					# Withdrawals allowed: "None, by closure only", "Unlimited", "None, until child is 18"
					pass
				elif td_idx == 5:
					#print "5: " + td_text
					for img in td.find_all('img'):
						title = img.get('title').lower().strip()
						if title == 'online':
							savings_data['online'] = 'Y'
						elif title == 'branch':
							savings_data['branch'] = 'Y'
							# I'm going to ignore "phone"
						elif title == 'phone':
							pass
				elif td_idx == 6:
					#print "6: " + td_text
					# more details link
					new_url = base_url + td.find_all('a')[0].get('href')
					if new_url in urls_seen:
						continue
					savings_array = process_more_info_page(savings_data,new_url,logger)
					print new_url
					for this_savings_data in savings_array:
						# insert savings here TODO.
						print this_savings_data
						isa = this_savings_data['isa']
						regular_saver = this_savings_data['regular_saver']
						regular_saver_frequency_period = this_savings_data['regular_saver_frequency_period']
						regular_saver_frequency_type = this_savings_data['regular_saver_frequency_type']
						regular_saver_min_amt = this_savings_data['regular_saver_min_amt']
						regular_saver_max_amt = this_savings_data['regular_saver_max_amt']
						bonus = this_savings_data['bonus']
						bonus_frequency_period = this_savings_data['bonus_frequency_period']
						bonus_frequency_type = this_savings_data['bonus_frequency_type']
						online = this_savings_data['online']
						branch = this_savings_data['branch']
						variability = this_savings_data['variability']
						min_amt = this_savings_data['min_amt']
						max_amt = this_savings_data['max_amt']
						gross_percent = this_savings_data['gross_percent']
						aer_percent = this_savings_data['aer_percent']
						interest_paid = this_savings_data['interest_paid']
						child = this_savings_data['child']
						savings_period = this_savings_data['savings_period']
						savings_util.handle_savings_insert(institution_code, isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, url, logger)
					urls_seen.insert(0,new_url)
				else:
					themortgagemeter_utils.record_alert('ERROR: too many tds in tr: ' + tr,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
					exit()
				td_idx = td_idx + 1
Exemplo n.º 13
0
def process_more_info_page(savings_data,url,logger):
	bsobj = themortgagemeter_utils.get_page(False,'static_html/halifax/savings-accounts.html',url,logger)
	#print bsobj
	savings_array = []
	#print "Passed in:"
	#print savings_data
	print url
	if savings_data['isa'] == 'Y':
		for i1 in bsobj.find_all("h2",text="Summary box"):
			for i2 in i1.parent():
				if i2.find_all("table") != []:
					tabs = i2.find_all("table")
					if re.match(".*isa-saver-fixed.*",url):
						if len(tabs) != 2:
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						else:
							tabs.pop(0)
						for tab in tabs:
							tbody = tab.find_all("tbody")[0]
							trs = tbody.find_all("tr")
							for tr in trs:
								savings_data_tmp = savings_data.copy()
								tds = tr.find_all("td")
								savings_data_tmp['savings_period'] = themortgagemeter_utils.get_months(tds[0].text.strip().encode('utf-8'),logger)
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(tds[1].text.strip().encode('utf-8'),logger)
								savings_data_tmp['gross_percent'] = savings_data_tmp['aer_percent']
								savings_array.append(savings_data_tmp)
					else:
						if len(tabs) > 1:
							#print tabs
							themortgagemeter_utils.record_alert('ERROR: too many tabs in isa',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
							exit()
						for tab in tabs:
							#print tab
							for tr in tab.find_all("tr"):
								ths = tr.find_all("th")
								tds = tr.find_all("td")
								if len(ths) > 0 and len(tds) > 0:
									th = tr.find_all("th")[0]
									td = tr.find_all("td")[0]
									th_text = th.text.lower()
									td_text = td.text.lower()
									if re.match('interest rates.*',th_text):
										#print "IR:" + td_text
										pc = themortgagemeter_utils.get_percentage(td_text,logger)
										savings_data_tmp = savings_data.copy()
										savings_data_tmp['gross_percent'] = pc
										savings_data_tmp['aer_percent'] = pc
										savings_array.append(savings_data_tmp)
								else:
									if len(ths) == 0 and len(tds) > 0:
										td1 = tds[0]
										td2 = tds[1]
										td1_text = td1.text.lower()
										td2_text = td2.text.lower()
										if re.match('interest rates.*',td1_text):
											pc = themortgagemeter_utils.get_percentage(td2_text,logger)
											savings_data_tmp = savings_data.copy()
											savings_data_tmp['gross_percent'] = pc
											savings_data_tmp['aer_percent'] = pc
											savings_array.append(savings_data_tmp)
									else:
										themortgagemeter_utils.record_alert('ERROR: unhandled case: ' + url,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
										exit()
	elif re.match('.*fixed-online-saver.*',url) or re.match('.*tracker-bond.*',url) or re.match('.*fixed-saver.*',url):
		if re.match('.*fixed-online-saver.*',url) or re.match('.*fixed-saver.*',url):
			#print bsobj
			code = "FOS"
			i1s = bsobj.find_all("h3",text="Current Rates")
			if i1s== []:
				i1s = bsobj.find_all("h3",text="Current rates")
		elif re.match('.*tracker-bond.*',url):
			#print bsobj
			code = "TB"
			i1s = []
			res = bsobj.find_all("h4")
			for i in res:
				#print i.text
				if i.text == "Current rates and apply":
					i1s.append(i)
					break
		if i1s == []:
			themortgagemeter_utils.record_alert('No items from expected h3/4 match!',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		for i1 in i1s:
			for i2 in i1.parent():
				tbodys = i2.find_all("tbody")
				# if this is tracker bond, discard the first table
				if len(tbodys) == 0:
					continue
				if code == "TB":
					ok = False
					for tbody in tbodys:
						for tr in tbody.find_all("tr"):
							tds = tr.find_all("td")
							if tds[0].text == "Term":
								ok = True
					if not ok:
						continue
				for tbody in tbodys:
					tr_count = -1
					table_savings_period = "unset"
					for tr in tbody.find_all("tr"):
						tr_count = tr_count + 1
						if code == "TB" and tr_count == 0:
							# skip the first row
							continue
						# clone the savings_data ready to write to
						savings_data_tmp = savings_data.copy()
						# First td is time only on first row for TB
						if code == "TB" and tr_count > 1:
							td_count = 1
						else:
							td_count = 0
						if code == "TB" and tr_count > 1:
							if table_savings_period == "unset":
								themortgagemeter_utils.record_alert('ERROR: table_savings_period should not be unset',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
								exit()
							savings_data_tmp['savings_period'] = table_savings_period
						for td in tr.find_all("td"):
							# 0 - term
							# 1 - balance 
							# 2 - Gross
							# 3 - AER
							# 4 - NET (ignore)
							# Ignore remainder of cols
							text = td.text.lower().strip().encode('utf-8')
							if td_count == 0:
								# store this in a variable for use on next row if necessary
								table_savings_period = themortgagemeter_utils.get_months(text,logger)
								savings_data_tmp['savings_period'] = table_savings_period
							elif td_count == 1:
								res = savings_util.get_money_range(text,logger)
								savings_data_tmp['min_amt'] = res[0]
								savings_data_tmp['max_amt'] = res[1]
							elif td_count == 2:
								savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(text,logger)
							elif td_count == 3:
								savings_data_tmp['aer_percent'] = themortgagemeter_utils.get_percentage(text,logger)
								# and then break out
								break
							td_count = td_count + 1
						savings_array.append(savings_data_tmp)
	elif re.match('.*/online-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# get_money range
			res = savings_util.get_money_range(l,logger)
			savings_data_tmp['min_amt'] = res[0]
			savings_data_tmp['max_amt'] = res[1]
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/regular-saver/',url):
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		savings_data['regular_saver_frequency_period'] = '1'
		savings_data['regular_saver_frequency_type'] = 'M'
		savings_data['regular_saver'] = 'Y'
		# Always fixed
		savings_data['variability'] = 'F'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.* or [0-9].*',l):
				for l2 in l.split(' or ',1):
					lines.append(l2)
			else:
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			if savings_data_tmp['gross_percent'] == '':
				# abandon ship!
				continue
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# Hard-code to 25-250 for now, this seems standard
			savings_data_tmp['regular_saver_min_amt'] = '25'
			savings_data_tmp['regular_saver_max_amt'] = '250'
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/everyday-saver/',url):
		# This one's quite simple (I think)
		# TODO: need to set this for other types
		savings_data['interest_paid'] = 'Y'
		#print bsobj
		# get the apr class element, as that contains the text we need
		apr = bsobj.find_all(attrs={'class':'apr'})[0].parent.parent.text.encode('utf-8')
		#print apr
		# split this line by \n
		apr = apr.split('\n')
		lines = []
		for l in apr:
			if re.match('.*gross.*',l):
				lines.append(l)
		while '' in lines:
			lines.remove('')
		for l in lines:
			# copy 
			savings_data_tmp = savings_data.copy()
			#print l
			# get percentage
			savings_data_tmp['gross_percent'] = themortgagemeter_utils.get_percentage(l,logger)
			savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
			# TODO: bonus_frequency_period set to 1, or get from data?
			# append to savings_array
			savings_array.append(savings_data_tmp)
	elif re.match('.*/branch-accounts/.*',url):
		return savings_array
	else:
		logger.info('unhandled:' + url)
		exit()
	if savings_array == []:
		themortgagemeter_utils.record_alert('ERROR: returning nothing from a page',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	# Return the savings_array
	logger.info('returning savings_array:' + str(savings_array))
	return savings_array
Exemplo n.º 14
0
def check_data(isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, logger):
	# Now we check that the values we have are the right type:
	if themortgagemeter_utils.isnumber(regular_saver_frequency_period) != True:
		logger.critical('problem with regular_saver_frequency_period - not a number:' + regular_saver_frequency_period)
		themortgagemeter_utils.record_alert('ERROR: problem with regular_saver_frequency_period - not a number: ',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(regular_saver_min_amt) != True:
		logger.critical('problem with regular_saver_min_amt - not a number: ' + regular_saver_min_amt)
		themortgagemeter_utils.record_alert('ERROR: problem with regular_saver_min_amt',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(regular_saver_max_amt) != True:
		logger.critical('problem with regular_saver_max_amt - not a number:' + regular_saver_max_amt)
		themortgagemeter_utils.record_alert('ERROR: problem with regular_saver_max_amt - not a number:',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(bonus_frequency_period) != True:
		logger.critical('problem with bonus_frequency_period - not a number:' + bonus_frequency_period)
		themortgagemeter_utils.record_alert('ERROR: problem with bonus_frequency_period - not a number: ',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(savings_period) != True:
		logger.critical('problem with savings_period - not a number:' + savings_period)
		themortgagemeter_utils.record_alert('ERROR: problem with savings_period - not a number: ',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(min_amt) != True:
		logger.critical('problem with min_amt - not a number:' + min_amt)
		themortgagemeter_utils.record_alert('ERROR: problem with min_amt - not a number: ',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(max_amt) != True:
		logger.critical('problem with max_amt - not a number:' + max_amt)
		themortgagemeter_utils.record_alert('ERROR: problem with max_amt - not a number: ',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(gross_percent) != True:
		logger.critical('problem with gross_percent - not a number:' + gross_percent)
		themortgagemeter_utils.record_alert('ERROR: problem with gross_percent - not a number: ',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
	elif themortgagemeter_utils.isnumber(aer_percent) != True:
		logger.critical('problem with aer_percent - not a number:' + aer_percent)
		themortgagemeter_utils.record_alert('ERROR: problem with aer_percent - not a number: ',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
		exit()
Exemplo n.º 15
0
def main():
    global changes
    import themortgagemeter_db
    import themortgagemeter_utils
    themortgagemeter_db.open_db()
    if args.logging == 'DEBUG':
        logger = themortgagemeter_utils.setup_logging(logging.DEBUG,
                                                      stdout=args.stdout)
    elif args.logging == 'INFO':
        logger = themortgagemeter_utils.setup_logging(logging.INFO,
                                                      stdout=args.stdout)
    elif args.logging == 'WARNING':
        logger = themortgagemeter_utils.setup_logging(logging.WARNING,
                                                      stdout=args.stdout)
    elif args.logging == 'ERROR':
        logger = themortgagemeter_utils.setup_logging(logging.ERROR,
                                                      stdout=args.stdout)
    elif args.logging == 'CRITICAL':
        logger = themortgagemeter_utils.setup_logging(logging.CRITICAL,
                                                      stdout=args.stdout)
    elif args.logging == 'STDOUT':
        logger = themortgagemeter_utils.setup_logging(logging.CRITICAL,
                                                      stdout=args.stdout)
    logger.info('Program starting: %s', args.institution)
    try:
        if args.institution == 'HSBC':
            import hsbc
            hsbc.hsbc_main(args.static, args.forcedelete, logger)
        elif args.institution == 'NTNWD':
            import nationwide
            nationwide.nationwide_main(args.static, args.forcedelete, logger)
        elif args.institution == 'LLOYDS':
            import lloyds
            lloyds.lloyds_main(args.static, args.forcedelete, logger)
        elif args.institution == 'SNTNDR':
            import santander
            santander.santander_main(args.static, args.forcedelete, logger)
        elif args.institution == 'HLFX':
            import halifax
            halifax.halifax_main(args.static, args.forcedelete, logger)
        elif args.institution == 'NTWST':
            import natwest
            natwest.natwest_main(args.static, args.forcedelete, logger)
        elif args.institution == 'NRTHNR':
            import northernrock
            northernrock.northernrock_main(args.static, args.forcedelete,
                                           logger)
        elif args.institution == 'CHLS':
            import chelsea
            chelsea.chelsea_main(args.static, args.forcedelete, logger)
        elif args.institution == 'YRKSHR':
            import yorkshire
            yorkshire.yorkshire_main(args.static, args.forcedelete, logger)
        elif args.institution == 'TSC':
            import tesco
            tesco.tesco_main(args.static, args.forcedelete, logger)
        elif args.institution == 'SKPTN':
            import skipton
            skipton.skipton_main(args.static, args.forcedelete, logger)
        elif args.institution == 'PSTFFC':
            import post_office
            post_office.post_office_main(args.static, args.forcedelete, logger)
        elif args.institution == 'FRSTDRCT':
            import first_direct
            first_direct.first_direct_main(args.static, args.forcedelete,
                                           logger)
        else:
            raise Exception('Need to supply an institution', '')
        if not args.test:
            themortgagemeter_db.db_connection.commit()
        else:
            logger.info('Not committing data, as --test passed in')
            themortgagemeter_db.db_connection.rollback()
    except Exception as e:
        logger.critical('Error was thrown, quitting')
        logger.exception('Error was:')
        themortgagemeter_utils.record_alert('ERROR: ' + args.institution,
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
    logger.info('Program complete for institution: %s', args.institution)
    # TOOD: why does this never seem to be set to true?
    global changes
    logger.info('Changes is: ' + str(changes))
    themortgagemeter_db.commit_db()
Exemplo n.º 16
0
def get_product_pages(static, base_url, ext_url, logger):
    url = base_url + ext_url
    urls_seen = []
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/halifax/savings-accounts.html', url, logger)
    # let's see how much info we can extract from the page
    # Get all the sortable tables and divine as much info as possible from that.
    sortable_tables = doormatCols = bsobj.find_all(
        attrs={'class': 'sortableTable'})
    for table in sortable_tables:
        #print table
        for tr in table.find_all('tr'):
            td_idx = 0
            savings_data = savings_util.get_savings_data_object()
            for td in tr.find_all('td'):
                td_text = td.text.encode('utf-8').strip().lower()
                if td_idx == 0:
                    # title of account - Junior == child
                    #re.match('/product/A[0-9]+.*',href)
                    ##0 ##<td style="text-align: left;"><a href="/savings/accounts/cash-isas/isa-saver-online/">ISA Saver Online</a></td>
                    ##1 ##<td style="text-align: left;"><strong class="apr">1.35%</strong> tax free/AER variable including 12 month fixed bonus of<strong> </strong>1.10%</td>
                    ##2 ##<td>£1</td>
                    ##3 ##<td>Variable</td>
                    ##4 ##<td>Unlimited</td>
                    ##5 ##<td style="text-align: center;"><img alt="Online" src="/common/images/icons/mousegrey.gif" title="Online"/></td>
                    ##6 ##<td><a href="/savings/accounts/cash-isas/isa-saver-online/"><img alt="Find out more" src="/common/images/Buttons/primary_find_out_more.gif"/></a></td>
                    #print "0: " + td_text
                    if re.match('^.*isa.*$', td_text):
                        savings_data['isa'] = 'Y'
                    if re.match('^.*junior.*$', td_text):
                        savings_data['child'] = 'Y'
                elif td_idx == 1:
                    #print "1: " + td_text
                    # We don't bother with this at the moment - TODO - sort this out
                    #if re.match('.*bonus.*',td_text):
                    #	savings_data['bonus'] = 'Y'
                    pass
                elif td_idx == 2:
                    #print "2: " + td_text
                    # minimum investment, max is always infinity
                    min_amt = themortgagemeter_utils.get_money(td_text, logger)
                    savings_data['min_amt'] = min_amt
                elif td_idx == 3:
                    #print "3: " + td_text
                    # Variable/Fixed
                    if re.match('.*variable.*', td_text):
                        savings_data['variability'] = 'V'
                    elif re.match('.*fixed.*', td_text):
                        savings_data['variability'] = 'F'
                    else:
                        themortgagemeter_utils.record_alert(
                            'ERROR: unknown variability: ' + td_text, logger,
                            themortgagemeter_db.db_connection,
                            themortgagemeter_db.cursor)
                        exit()
                elif td_idx == 4:
                    #print "4: " + td_text
                    # Let's assume we'll get this info from the sub-page.
                    # Withdrawals allowed: "None, by closure only", "Unlimited", "None, until child is 18"
                    pass
                elif td_idx == 5:
                    #print "5: " + td_text
                    for img in td.find_all('img'):
                        title = img.get('title').lower().strip()
                        if title == 'online':
                            savings_data['online'] = 'Y'
                        elif title == 'branch':
                            savings_data['branch'] = 'Y'
                            # I'm going to ignore "phone"
                        elif title == 'phone':
                            pass
                elif td_idx == 6:
                    #print "6: " + td_text
                    # more details link
                    new_url = base_url + td.find_all('a')[0].get('href')
                    if new_url in urls_seen:
                        continue
                    savings_array = process_more_info_page(
                        savings_data, new_url, logger)
                    print new_url
                    for this_savings_data in savings_array:
                        # insert savings here TODO.
                        print this_savings_data
                        isa = this_savings_data['isa']
                        regular_saver = this_savings_data['regular_saver']
                        regular_saver_frequency_period = this_savings_data[
                            'regular_saver_frequency_period']
                        regular_saver_frequency_type = this_savings_data[
                            'regular_saver_frequency_type']
                        regular_saver_min_amt = this_savings_data[
                            'regular_saver_min_amt']
                        regular_saver_max_amt = this_savings_data[
                            'regular_saver_max_amt']
                        bonus = this_savings_data['bonus']
                        bonus_frequency_period = this_savings_data[
                            'bonus_frequency_period']
                        bonus_frequency_type = this_savings_data[
                            'bonus_frequency_type']
                        online = this_savings_data['online']
                        branch = this_savings_data['branch']
                        variability = this_savings_data['variability']
                        min_amt = this_savings_data['min_amt']
                        max_amt = this_savings_data['max_amt']
                        gross_percent = this_savings_data['gross_percent']
                        aer_percent = this_savings_data['aer_percent']
                        interest_paid = this_savings_data['interest_paid']
                        child = this_savings_data['child']
                        savings_period = this_savings_data['savings_period']
                        savings_util.handle_savings_insert(
                            institution_code, isa, regular_saver,
                            regular_saver_frequency_period,
                            regular_saver_frequency_type,
                            regular_saver_min_amt, regular_saver_max_amt,
                            bonus, bonus_frequency_period,
                            bonus_frequency_type, online, branch, variability,
                            savings_period, min_amt, max_amt, gross_percent,
                            aer_percent, child, interest_paid, url, logger)
                    urls_seen.insert(0, new_url)
                else:
                    themortgagemeter_utils.record_alert(
                        'ERROR: too many tds in tr: ' + tr, logger,
                        themortgagemeter_db.db_connection,
                        themortgagemeter_db.cursor)
                    exit()
                td_idx = td_idx + 1
Exemplo n.º 17
0
def process_more_info_page(savings_data, url, logger):
    bsobj = themortgagemeter_utils.get_page(
        False, 'static_html/halifax/savings-accounts.html', url, logger)
    #print bsobj
    savings_array = []
    #print "Passed in:"
    #print savings_data
    print url
    if savings_data['isa'] == 'Y':
        for i1 in bsobj.find_all("h2", text="Summary box"):
            for i2 in i1.parent():
                if i2.find_all("table") != []:
                    tabs = i2.find_all("table")
                    if re.match(".*isa-saver-fixed.*", url):
                        if len(tabs) != 2:
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        else:
                            tabs.pop(0)
                        for tab in tabs:
                            tbody = tab.find_all("tbody")[0]
                            trs = tbody.find_all("tr")
                            for tr in trs:
                                savings_data_tmp = savings_data.copy()
                                tds = tr.find_all("td")
                                savings_data_tmp[
                                    'savings_period'] = themortgagemeter_utils.get_months(
                                        tds[0].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        tds[1].text.strip().encode('utf-8'),
                                        logger)
                                savings_data_tmp[
                                    'gross_percent'] = savings_data_tmp[
                                        'aer_percent']
                                savings_array.append(savings_data_tmp)
                    else:
                        if len(tabs) > 1:
                            #print tabs
                            themortgagemeter_utils.record_alert(
                                'ERROR: too many tabs in isa', logger,
                                themortgagemeter_db.db_connection,
                                themortgagemeter_db.cursor)
                            exit()
                        for tab in tabs:
                            #print tab
                            for tr in tab.find_all("tr"):
                                ths = tr.find_all("th")
                                tds = tr.find_all("td")
                                if len(ths) > 0 and len(tds) > 0:
                                    th = tr.find_all("th")[0]
                                    td = tr.find_all("td")[0]
                                    th_text = th.text.lower()
                                    td_text = td.text.lower()
                                    if re.match('interest rates.*', th_text):
                                        #print "IR:" + td_text
                                        pc = themortgagemeter_utils.get_percentage(
                                            td_text, logger)
                                        savings_data_tmp = savings_data.copy()
                                        savings_data_tmp['gross_percent'] = pc
                                        savings_data_tmp['aer_percent'] = pc
                                        savings_array.append(savings_data_tmp)
                                else:
                                    if len(ths) == 0 and len(tds) > 0:
                                        td1 = tds[0]
                                        td2 = tds[1]
                                        td1_text = td1.text.lower()
                                        td2_text = td2.text.lower()
                                        if re.match('interest rates.*',
                                                    td1_text):
                                            pc = themortgagemeter_utils.get_percentage(
                                                td2_text, logger)
                                            savings_data_tmp = savings_data.copy(
                                            )
                                            savings_data_tmp[
                                                'gross_percent'] = pc
                                            savings_data_tmp[
                                                'aer_percent'] = pc
                                            savings_array.append(
                                                savings_data_tmp)
                                    else:
                                        themortgagemeter_utils.record_alert(
                                            'ERROR: unhandled case: ' + url,
                                            logger,
                                            themortgagemeter_db.db_connection,
                                            themortgagemeter_db.cursor)
                                        exit()
    elif re.match('.*fixed-online-saver.*', url) or re.match(
            '.*tracker-bond.*', url) or re.match('.*fixed-saver.*', url):
        if re.match('.*fixed-online-saver.*', url) or re.match(
                '.*fixed-saver.*', url):
            #print bsobj
            code = "FOS"
            i1s = bsobj.find_all("h3", text="Current Rates")
            if i1s == []:
                i1s = bsobj.find_all("h3", text="Current rates")
        elif re.match('.*tracker-bond.*', url):
            #print bsobj
            code = "TB"
            i1s = []
            res = bsobj.find_all("h4")
            for i in res:
                #print i.text
                if i.text == "Current rates and apply":
                    i1s.append(i)
                    break
        if i1s == []:
            themortgagemeter_utils.record_alert(
                'No items from expected h3/4 match!', logger,
                themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        for i1 in i1s:
            for i2 in i1.parent():
                tbodys = i2.find_all("tbody")
                # if this is tracker bond, discard the first table
                if len(tbodys) == 0:
                    continue
                if code == "TB":
                    ok = False
                    for tbody in tbodys:
                        for tr in tbody.find_all("tr"):
                            tds = tr.find_all("td")
                            if tds[0].text == "Term":
                                ok = True
                    if not ok:
                        continue
                for tbody in tbodys:
                    tr_count = -1
                    table_savings_period = "unset"
                    for tr in tbody.find_all("tr"):
                        tr_count = tr_count + 1
                        if code == "TB" and tr_count == 0:
                            # skip the first row
                            continue
                        # clone the savings_data ready to write to
                        savings_data_tmp = savings_data.copy()
                        # First td is time only on first row for TB
                        if code == "TB" and tr_count > 1:
                            td_count = 1
                        else:
                            td_count = 0
                        if code == "TB" and tr_count > 1:
                            if table_savings_period == "unset":
                                themortgagemeter_utils.record_alert(
                                    'ERROR: table_savings_period should not be unset',
                                    logger, themortgagemeter_db.db_connection,
                                    themortgagemeter_db.cursor)
                                exit()
                            savings_data_tmp[
                                'savings_period'] = table_savings_period
                        for td in tr.find_all("td"):
                            # 0 - term
                            # 1 - balance
                            # 2 - Gross
                            # 3 - AER
                            # 4 - NET (ignore)
                            # Ignore remainder of cols
                            text = td.text.lower().strip().encode('utf-8')
                            if td_count == 0:
                                # store this in a variable for use on next row if necessary
                                table_savings_period = themortgagemeter_utils.get_months(
                                    text, logger)
                                savings_data_tmp[
                                    'savings_period'] = table_savings_period
                            elif td_count == 1:
                                res = savings_util.get_money_range(
                                    text, logger)
                                savings_data_tmp['min_amt'] = res[0]
                                savings_data_tmp['max_amt'] = res[1]
                            elif td_count == 2:
                                savings_data_tmp[
                                    'gross_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                            elif td_count == 3:
                                savings_data_tmp[
                                    'aer_percent'] = themortgagemeter_utils.get_percentage(
                                        text, logger)
                                # and then break out
                                break
                            td_count = td_count + 1
                        savings_array.append(savings_data_tmp)
    elif re.match('.*/online-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # get_money range
            res = savings_util.get_money_range(l, logger)
            savings_data_tmp['min_amt'] = res[0]
            savings_data_tmp['max_amt'] = res[1]
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/regular-saver/', url):
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        savings_data['regular_saver_frequency_period'] = '1'
        savings_data['regular_saver_frequency_type'] = 'M'
        savings_data['regular_saver'] = 'Y'
        # Always fixed
        savings_data['variability'] = 'F'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.* or [0-9].*', l):
                for l2 in l.split(' or ', 1):
                    lines.append(l2)
            else:
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            if savings_data_tmp['gross_percent'] == '':
                # abandon ship!
                continue
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # Hard-code to 25-250 for now, this seems standard
            savings_data_tmp['regular_saver_min_amt'] = '25'
            savings_data_tmp['regular_saver_max_amt'] = '250'
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/everyday-saver/', url):
        # This one's quite simple (I think)
        # TODO: need to set this for other types
        savings_data['interest_paid'] = 'Y'
        #print bsobj
        # get the apr class element, as that contains the text we need
        apr = bsobj.find_all(
            attrs={'class': 'apr'})[0].parent.parent.text.encode('utf-8')
        #print apr
        # split this line by \n
        apr = apr.split('\n')
        lines = []
        for l in apr:
            if re.match('.*gross.*', l):
                lines.append(l)
        while '' in lines:
            lines.remove('')
        for l in lines:
            # copy
            savings_data_tmp = savings_data.copy()
            #print l
            # get percentage
            savings_data_tmp[
                'gross_percent'] = themortgagemeter_utils.get_percentage(
                    l, logger)
            savings_data_tmp['aer_percent'] = savings_data_tmp['gross_percent']
            # TODO: bonus_frequency_period set to 1, or get from data?
            # append to savings_array
            savings_array.append(savings_data_tmp)
    elif re.match('.*/branch-accounts/.*', url):
        return savings_array
    else:
        logger.info('unhandled:' + url)
        exit()
    if savings_array == []:
        themortgagemeter_utils.record_alert(
            'ERROR: returning nothing from a page', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    # Return the savings_array
    logger.info('returning savings_array:' + str(savings_array))
    return savings_array
def check_data(isa, regular_saver, regular_saver_frequency_period,
               regular_saver_frequency_type, regular_saver_min_amt,
               regular_saver_max_amt, bonus, bonus_frequency_period,
               bonus_frequency_type, online, branch, variability,
               savings_period, min_amt, max_amt, gross_percent, aer_percent,
               child, interest_paid, logger):
    # Now we check that the values we have are the right type:
    if themortgagemeter_utils.isnumber(regular_saver_frequency_period) != True:
        logger.critical(
            'problem with regular_saver_frequency_period - not a number:' +
            regular_saver_frequency_period)
        themortgagemeter_utils.record_alert(
            'ERROR: problem with regular_saver_frequency_period - not a number: ',
            logger, themortgagemeter_db.db_connection,
            themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(regular_saver_min_amt) != True:
        logger.critical('problem with regular_saver_min_amt - not a number: ' +
                        regular_saver_min_amt)
        themortgagemeter_utils.record_alert(
            'ERROR: problem with regular_saver_min_amt', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(regular_saver_max_amt) != True:
        logger.critical('problem with regular_saver_max_amt - not a number:' +
                        regular_saver_max_amt)
        themortgagemeter_utils.record_alert(
            'ERROR: problem with regular_saver_max_amt - not a number:',
            logger, themortgagemeter_db.db_connection,
            themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(bonus_frequency_period) != True:
        logger.critical('problem with bonus_frequency_period - not a number:' +
                        bonus_frequency_period)
        themortgagemeter_utils.record_alert(
            'ERROR: problem with bonus_frequency_period - not a number: ',
            logger, themortgagemeter_db.db_connection,
            themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(savings_period) != True:
        logger.critical('problem with savings_period - not a number:' +
                        savings_period)
        themortgagemeter_utils.record_alert(
            'ERROR: problem with savings_period - not a number: ', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(min_amt) != True:
        logger.critical('problem with min_amt - not a number:' + min_amt)
        themortgagemeter_utils.record_alert(
            'ERROR: problem with min_amt - not a number: ', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(max_amt) != True:
        logger.critical('problem with max_amt - not a number:' + max_amt)
        themortgagemeter_utils.record_alert(
            'ERROR: problem with max_amt - not a number: ', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(gross_percent) != True:
        logger.critical('problem with gross_percent - not a number:' +
                        gross_percent)
        themortgagemeter_utils.record_alert(
            'ERROR: problem with gross_percent - not a number: ', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
    elif themortgagemeter_utils.isnumber(aer_percent) != True:
        logger.critical('problem with aer_percent - not a number:' +
                        aer_percent)
        themortgagemeter_utils.record_alert(
            'ERROR: problem with aer_percent - not a number: ', logger,
            themortgagemeter_db.db_connection, themortgagemeter_db.cursor)
        exit()
Exemplo n.º 19
0
def main():
	global changes
	import themortgagemeter_db
	import themortgagemeter_utils
	themortgagemeter_db.open_db()
	if args.logging == 'DEBUG':
		logger = themortgagemeter_utils.setup_logging(logging.DEBUG,stdout=args.stdout)
	elif args.logging == 'INFO':
		logger = themortgagemeter_utils.setup_logging(logging.INFO,stdout=args.stdout)
	elif args.logging == 'WARNING':
		logger = themortgagemeter_utils.setup_logging(logging.WARNING,stdout=args.stdout)
	elif args.logging == 'ERROR':
		logger = themortgagemeter_utils.setup_logging(logging.ERROR,stdout=args.stdout)
	elif args.logging == 'CRITICAL':
		logger = themortgagemeter_utils.setup_logging(logging.CRITICAL,stdout=args.stdout)
	elif args.logging == 'STDOUT':
		logger = themortgagemeter_utils.setup_logging(logging.CRITICAL,stdout=args.stdout)
	logger.info('Program starting: %s', args.institution)
	try:
		if args.institution == 'HSBC':
			import hsbc
			hsbc.hsbc_main(args.static,args.forcedelete,logger)
		elif args.institution == 'NTNWD':
			import nationwide
			nationwide.nationwide_main(args.static,args.forcedelete,logger)
		elif args.institution == 'LLOYDS':
			import lloyds
			lloyds.lloyds_main(args.static,args.forcedelete,logger)
		elif args.institution == 'SNTNDR':
			import santander
			santander.santander_main(args.static,args.forcedelete,logger)
		elif args.institution == 'HLFX':
			import halifax
			halifax.halifax_main(args.static,args.forcedelete,logger)
		elif args.institution == 'NTWST':
			import natwest
			natwest.natwest_main(args.static,args.forcedelete,logger)
		elif args.institution == 'NRTHNR':
			import northernrock
			northernrock.northernrock_main(args.static,args.forcedelete,logger)
		elif args.institution == 'CHLS':
			import chelsea
			chelsea.chelsea_main(args.static,args.forcedelete,logger)
		elif args.institution == 'YRKSHR':
			import yorkshire
			yorkshire.yorkshire_main(args.static,args.forcedelete,logger)
		elif args.institution == 'TSC':
			import tesco
			tesco.tesco_main(args.static,args.forcedelete,logger)
		elif args.institution == 'SKPTN':
			import skipton
			skipton.skipton_main(args.static,args.forcedelete,logger)
		elif args.institution == 'PSTFFC':
			import post_office
			post_office.post_office_main(args.static,args.forcedelete,logger)
		elif args.institution == 'FRSTDRCT':
			import first_direct
			first_direct.first_direct_main(args.static,args.forcedelete,logger)
		else:
			raise Exception('Need to supply an institution','')
		if not args.test:
			themortgagemeter_db.db_connection.commit()
		else:
			logger.info('Not committing data, as --test passed in')
			themortgagemeter_db.db_connection.rollback()
	except Exception as e:
		logger.critical('Error was thrown, quitting')
		logger.exception('Error was:')
		themortgagemeter_utils.record_alert('ERROR: ' + args.institution,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
	logger.info('Program complete for institution: %s', args.institution)
	# TOOD: why does this never seem to be set to true?
	global changes
	logger.info('Changes is: ' + str(changes))
	themortgagemeter_db.commit_db()
Exemplo n.º 20
0
def get_product_pages(static,base_url,suffix,mortgage_type,href_re):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/skipton/fixed_rate_mortgages.html',base_url + suffix,logger)
	term = str(25 * 12)
	#print bsobj
	anchors = bsobj.find_all(href=href_re)
	for anchor in anchors:
		#print anchor
		# Get from the anchor the ltv and the term
		link = anchor.get('href')
		url = base_url + link
		# Still to get:
		rate_percent    = 0
		svr_percent     = 0
		apr_percent     = 0
		booking_fee     = 0
		application_fee = 0
		# eligibilities - first time buyers have own page, so all others?
		eligibilities   = ['NMH','NRM','ERM','EMH','EBM','EED']
		#print link
		if re.search(fr_re,link):
			initial_period = str(int(re.search(fr_re,link).group(1)) * 12)
			ltv_percent = str(int(re.search(fr_re,link).group(2)))
			# Now go to link
			subpage_bsobj = themortgagemeter_utils.get_page(static,'N/A',url,logger)
			table = subpage_bsobj.find_all(attrs={'id' : 'centralContent'},limit=1)[0].find_all('table',limit=1)[0]
			#print '==================================================='
			#print table
			tr_count = 0
			for tr in table.find_all('tr'):
				tr_count += 1
				if tr_count == 3:
					rate_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 4:
					svr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 5:
					apr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 7:
					application_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
				elif tr_count == 8:
					booking_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
		elif re.search(tracker_re,link):
			initial_period = str(int(re.search(tracker_re,link).group(1)) * 10)
			ltv_percent = str(int(re.search(tracker_re,link).group(2)))
			# Now go to link
			subpage_bsobj = themortgagemeter_utils.get_page(static,'N/A',url,logger)
			#print subpage_bsobj
			table = subpage_bsobj.find_all(attrs={'id' : 'centralContent'},limit=1)[0].find_all('table',limit=1)[0]
			#print '==================================================='
			#print table
			tr_count = 0
			for tr in table.find_all('tr'):
				tr_count += 1
				if tr_count == 3:
					rate_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 4:
					svr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 5:
					apr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 7:
					application_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
				elif tr_count == 8:
					booking_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
		elif re.search(discount_re,link):
			initial_period = str(int(re.search(discount_re,link).group(1)) * 10)
			ltv_percent = str(int(re.search(discount_re,link).group(2)))
			# Now go to link
			subpage_bsobj = themortgagemeter_utils.get_page(static,'N/A',url,logger)
			#print subpage_bsobj
			table = subpage_bsobj.find_all(attrs={'id' : 'centralContent'},limit=1)[0].find_all('table',limit=1)[0]
			#print '==================================================='
			#print table
			tr_count = 0
			for tr in table.find_all('tr'):
				tr_count += 1
				if tr_count == 3:
					rate_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 4:
					svr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[1].string,logger)
				elif tr_count == 5:
					apr_percent = themortgagemeter_utils.get_percentage(tr.find_all('td')[0].string,logger)
				elif tr_count == 7:
					application_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
				elif tr_count == 8:
					booking_fee = tr.find_all('td')[0].string.encode('utf_8')[2:].replace(',','')
		elif re.search(ftb_re,link):
			themortgagemeter_utils.record_alert('ERROR: SKIPTON first time buyer seen for the first time',logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
			continue
		else:
			raise Exception("Unhandled link " + url,'')
		# set up the booking fee
		# Sometimes it's "No Fee" on the page
		if booking_fee.strip() == "Fee":
			booking_fee = "0"
		if application_fee.strip() == "Fee":
			application_fee = "0"
		booking_fee = str(int(booking_fee) + int(application_fee))
		for eligibility in eligibilities:
			mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
Exemplo n.º 21
0
def get_product_page_interest_rates(url, savings_data):
    logger = logging.getLogger('retrieve ' + url)
    bsobj = themortgagemeter_utils.get_page(False, '', url, logger)
    #logger.info(url) #logger.info(bsobj)
    if re.match('.*isa.*', url):
        savings_data['isa'] = 'Y'
    for t in bsobj.find_all('table'):
        #logger.info("TABLE")# logger.info(t)
        # Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly.
        summary = t.get('summary').encode('utf-8').lower()
        if summary:
            # Set up data for this page
            summary_info = re.match('.*interest rates: (.*)', summary).group(1)
            #logger.info("summary info: " + summary_info)
            if summary_info in ("cash e-isa#"):
                savings_data['isa'] = 'Y'
            elif summary_info in ("fixed rate saver - monthly interest"):
                savings_data['variability'] = 'F'
                savings_data['interest_paid'] = 'M'
            elif summary_info in ("fixed rate saver - annual interest"):
                savings_data['variability'] = 'F'
                savings_data['interest_paid'] = 'Y'
            elif "regular saver" in summary_info:
                savings_data['regular_saver'] = 'Y'
                savings_data['interest_paid'] = 'Y'
            elif "online bonus" in summary_info:
                savings_data['bonus'] = 'Y'
                savings_data['branch'] = 'N'
                savings_data['bonus_frequency_period'] = '1'
                savings_data['bonus_frequency_type'] = 'M'
                # skip bonus for HSBC- it's complicated - probably needs its own function TODO
                continue
            elif "flexible saver" in summary_info:
                savings_data['variability'] = 'V'
            else:
                themortgagemeter_utils.record_alert(
                    'NEED TO HANDLE: ' + summary_info, logger,
                    themortgagemeter_db.db_connection,
                    themortgagemeter_db.cursor)
                exit()
            tr_count = 0
            for tr in t.find_all('tr'):
                # This is a new savings product, so clone the data at this point and use that from here.
                this_savings_data = savings_data.copy()
                #logger.info("TR " + str(tr_count)) #logger.info(tr)
                if this_savings_data['bonus'] == 'Y':
                    #print "BONUS"
                    #print tr
                    pass
                if this_savings_data['regular_saver'] == 'Y':
                    td_count = -1
                else:
                    td_count = 0
                if tr_count >= 1:
                    # If tax-free, this will be true
                    for td in tr.find_all('td'):
                        td_style = td.get('style')
                        if td_style != None:
                            td_style = td_style.lower().encode(
                                'utf-8').translate(None, ' ')
                            if td_style == 'vertical-align:middle':
                                continue
                        #logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count)
                        logger.info(td)
                        v = td.text.encode('utf-8').lower().strip()
                        if td_count == 0:
                            #logger.info(this_savings_data['regular_saver'])
                            if this_savings_data['regular_saver'] == 'Y':
                                logger.info('regular_saver: ' + v)
                                this_savings_data[
                                    'regular_saver_min_amt'] = v.split()[0][2:]
                                this_savings_data[
                                    'regular_saver_max_amt'] = v.split()[2][2:]
                                if v.split()[4] == "month":
                                    this_savings_data[
                                        'regular_saver_frequency_period'] = '1'
                                    this_savings_data[
                                        'regular_saver_frequency_type'] = 'M'
                                else:
                                    themortgagemeter_utils.record_alert(
                                        'ERROR: reg saver not parsed: ' + v,
                                        logger,
                                        themortgagemeter_db.db_connection,
                                        themortgagemeter_db.cursor)
                                    exit()
                            else:
                                # if it's got a + at the end, it's a min, if it's "up to" it's a max.
                                res = savings_util.get_money_range(v, logger)
                                this_savings_data['min_amt'] = res[0]
                                this_savings_data['max_amt'] = res[1]
                                # TODO: remove this section
                                #if re.match('^.*\+$',v):
                                #	money_val = themortgagemeter_utils.get_money(v,logger)
                                #	this_savings_data['min_amt'] = money_val
                                #elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v):
                                #	money_val = themortgagemeter_utils.get_money(v,logger)
                                #	this_savings_data['max_amt'] = money_val
                                #	this_savings_data['min_amt'] = 0
                                #elif re.match('^.* - .*$',v):
                                #	this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',')
                                #	this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',')
                                #else:
                                #	#logger.info(t) #logger.info('value not handled: ' + v)
                                #	themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
                                #	exit()
                        elif td_count == 1:
                            # we don't bother with net_percent
                            pass
                        elif td_count == 2:
                            # gross %
                            this_savings_data['gross_percent'] = v
                        elif td_count == 3:
                            this_savings_data['aer_percent'] = v
                        td_count += 1
                    # Some trs have no tds; we ignore those.
                    if td_count > 0:
                        # Now store this product
                        # TODO: fixed savings?
                        logger.info(this_savings_data)
                        isa = this_savings_data['isa']
                        regular_saver = this_savings_data['regular_saver']
                        regular_saver_frequency_period = this_savings_data[
                            'regular_saver_frequency_period']
                        regular_saver_frequency_type = this_savings_data[
                            'regular_saver_frequency_type']
                        regular_saver_min_amt = this_savings_data[
                            'regular_saver_min_amt']
                        regular_saver_max_amt = this_savings_data[
                            'regular_saver_max_amt']
                        bonus = this_savings_data['bonus']
                        bonus_frequency_period = this_savings_data[
                            'bonus_frequency_period']
                        bonus_frequency_type = this_savings_data[
                            'bonus_frequency_type']
                        online = this_savings_data['online']
                        branch = this_savings_data['branch']
                        variability = this_savings_data['variability']
                        min_amt = this_savings_data['min_amt']
                        max_amt = this_savings_data['max_amt']
                        gross_percent = this_savings_data['gross_percent']
                        aer_percent = this_savings_data['aer_percent']
                        interest_paid = this_savings_data['interest_paid']
                        child = this_savings_data['child']
                        savings_period = this_savings_data['savings_period']
                        savings_util.handle_savings_insert(
                            institution_code, isa, regular_saver,
                            regular_saver_frequency_period,
                            regular_saver_frequency_type,
                            regular_saver_min_amt, regular_saver_max_amt,
                            bonus, bonus_frequency_period,
                            bonus_frequency_type, online, branch, variability,
                            savings_period, min_amt, max_amt, gross_percent,
                            aer_percent, child, interest_paid, url, logger)
                else:
                    tr_count += 1
                    continue
                tr_count += 1
        else:
            #print url
            #print bsobj
            exit()
Exemplo n.º 22
0
def get_product_page_interest_rates(url,savings_data):
	logger = logging.getLogger('retrieve ' + url)
	bsobj = themortgagemeter_utils.get_page(False,'',url,logger)
	#logger.info(url) #logger.info(bsobj)
	if re.match('.*isa.*',url):
		savings_data['isa'] = 'Y'
	for t in bsobj.find_all('table'):
		#logger.info("TABLE")# logger.info(t)
		# Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly.
		summary = t.get('summary').encode('utf-8').lower()
		if summary:
			# Set up data for this page
			summary_info = re.match('.*interest rates: (.*)',summary).group(1)
			#logger.info("summary info: " + summary_info)
			if summary_info in ("cash e-isa#"):
				savings_data['isa'] = 'Y'
			elif summary_info in ("fixed rate saver - monthly interest"):
				savings_data['variability'] = 'F'
				savings_data['interest_paid'] = 'M'
			elif summary_info in ("fixed rate saver - annual interest"):
				savings_data['variability'] = 'F'
				savings_data['interest_paid'] = 'Y'
			elif "regular saver" in summary_info:
				savings_data['regular_saver'] = 'Y'
				savings_data['interest_paid'] = 'Y'
			elif "online bonus" in summary_info:
				savings_data['bonus']  = 'Y'
				savings_data['branch'] = 'N'
				savings_data['bonus_frequency_period'] = '1'
				savings_data['bonus_frequency_type']   = 'M'
				# skip bonus for HSBC- it's complicated - probably needs its own function TODO
				continue
			elif "flexible saver" in summary_info:
				savings_data['variability']     = 'V'
			else:
				themortgagemeter_utils.record_alert('NEED TO HANDLE: ' + summary_info,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
				exit()
			tr_count = 0
			for tr in t.find_all('tr'):
				# This is a new savings product, so clone the data at this point and use that from here.
				this_savings_data = savings_data.copy()
				#logger.info("TR " + str(tr_count)) #logger.info(tr)
				if this_savings_data['bonus'] == 'Y':
					#print "BONUS"
					#print tr
					pass
				if this_savings_data['regular_saver'] == 'Y':
					td_count = -1
				else:
					td_count = 0
				if tr_count >= 1:
					# If tax-free, this will be true
					for td in tr.find_all('td'):
						td_style = td.get('style')
						if td_style != None:
							td_style = td_style.lower().encode('utf-8').translate(None, ' ')
							if td_style == 'vertical-align:middle':
								continue
						#logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count)
						logger.info(td)
						v = td.text.encode('utf-8').lower().strip()
						if td_count == 0:
							#logger.info(this_savings_data['regular_saver'])
							if this_savings_data['regular_saver'] == 'Y':
								logger.info('regular_saver: ' + v)
								this_savings_data['regular_saver_min_amt'] = v.split()[0][2:]
								this_savings_data['regular_saver_max_amt'] = v.split()[2][2:]
								if v.split()[4] == "month":
									this_savings_data['regular_saver_frequency_period'] = '1'
									this_savings_data['regular_saver_frequency_type'] = 'M'
								else:
									themortgagemeter_utils.record_alert('ERROR: reg saver not parsed: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
									exit()
							else:
								# if it's got a + at the end, it's a min, if it's "up to" it's a max.
								res = savings_util.get_money_range(v,logger)
								this_savings_data['min_amt'] = res[0]
								this_savings_data['max_amt'] = res[1]
								# TODO: remove this section
								#if re.match('^.*\+$',v):
								#	money_val = themortgagemeter_utils.get_money(v,logger)
								#	this_savings_data['min_amt'] = money_val
								#elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v):
								#	money_val = themortgagemeter_utils.get_money(v,logger)
								#	this_savings_data['max_amt'] = money_val
								#	this_savings_data['min_amt'] = 0
								#elif re.match('^.* - .*$',v):
								#	this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',')
								#	this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',')
								#else:
								#	#logger.info(t) #logger.info('value not handled: ' + v)
								#	themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
								#	exit()
						elif td_count == 1:
							# we don't bother with net_percent
							pass
						elif td_count == 2:
							# gross %
							this_savings_data['gross_percent'] = v
						elif td_count == 3:
							this_savings_data['aer_percent'] = v
						td_count += 1
					# Some trs have no tds; we ignore those.
					if td_count > 0:
						# Now store this product
						# TODO: fixed savings?
						logger.info(this_savings_data)
						isa = this_savings_data['isa']
						regular_saver = this_savings_data['regular_saver']
						regular_saver_frequency_period = this_savings_data['regular_saver_frequency_period']
						regular_saver_frequency_type = this_savings_data['regular_saver_frequency_type']
						regular_saver_min_amt = this_savings_data['regular_saver_min_amt']
						regular_saver_max_amt = this_savings_data['regular_saver_max_amt']
						bonus = this_savings_data['bonus']
						bonus_frequency_period = this_savings_data['bonus_frequency_period']
						bonus_frequency_type = this_savings_data['bonus_frequency_type']
						online = this_savings_data['online']
						branch = this_savings_data['branch']
						variability = this_savings_data['variability']
						min_amt = this_savings_data['min_amt']
						max_amt = this_savings_data['max_amt']
						gross_percent = this_savings_data['gross_percent']
						aer_percent = this_savings_data['aer_percent']
						interest_paid = this_savings_data['interest_paid']
						child = this_savings_data['child']
						savings_period = this_savings_data['savings_period']
						savings_util.handle_savings_insert(institution_code, isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, url, logger)
				else:
					tr_count += 1
					continue
				tr_count += 1
		else:
			#print url
			#print bsobj
			exit()
Exemplo n.º 23
0
def get_product_page(static,url,eligibilities):
	logger = logging.getLogger('retrieve')
	bsobj = themortgagemeter_utils.get_page(static,'static_html/post_office/our-full-range.html',url,logger)
	#print bsobj
	term = str(25 * 12)
	ltv_elems = bsobj.find_all('h2')
	# foreach h2 element, determine the ltv.
	# then get the next element (which is the div, class displaytable). Then, for each tr:
	# td0 = years of fixed or tracker
	# td1 = initial rate
	# td2 = svr
	# td3 = apr
	# td4 = fees
	for ltv_elem in ltv_elems:
		# For post office, first reported % is 100 - LTV
		ltv_elem_str = ltv_elem.string
		if (ltv_elem_str):
			ltv_percent = themortgagemeter_utils.get_percentage(ltv_elem_str,logger)
			if ltv_percent != '':
				ltv_percent = str(100 - int(ltv_percent))
			else:
				continue
		else:
			continue
		div = ltv_elem.fetchNextSiblings(attrs={'class' : 'displaytable'},limit=1)
		if (div):
			logger.debug('here')
			logger.debug(div)
			trs = div[0].find_all('tr')
			for tr in trs:
				logger.debug(tr)
				# initialise:
				rate_percent = ''
				svr_percent = ''
				apr_percent = ''
				booking_fee = ''
				tds = tr.find_all('td')
				i = 0
				# If there are tds and there are more than 1 of them then we can extract a mortgage...
				logger.debug(tr)
				if tds and len(tds) > 1:
					logger.debug(tds[0].text.encode('utf-8').split('\n'))
					s = tds[0].text.encode('utf-8').split('\n')
					# Sometimes we get empty fields - we remove them here.
					while '' in s:
						s.remove('')
					initial_period = str(themortgagemeter_utils.get_months(s[i],logger))
					#logger.debug('type_str before split: ' + tds[i].text.encode('utf-8'))
					#logger.debug('tds i: ' + str(i) + ' tds: ' + str(tds))
					#logger.debug('tds i: ' + str(i) + ' tds[i]: ' + str(tds[i].text.encode('utf-8')))
					#logger.debug(re.sub('\xa0','',tds[i].text.encode('utf-8')).split())
					# TODO: generic text cleansing function
					type_str = re.sub('\xa0','',re.sub('\xc2',' ',tds[i].text.encode('utf-8'))).split()[2]
					logger.debug('type_str: ' + type_str)
					if type_str == 'fixed':
						mortgage_type = 'F'
					elif type_str == 'tracker':
						mortgage_type = 'T'
					else:
							themortgagemeter_utils.record_alert('ERROR: PSTFFC neither fixed nor tracker: ' + type_str,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
					i+=1
					j = 0
					for td in tds[i].text.encode('utf-8').split('\n'):
						t = tds[i].text.encode('utf-8').split('\n')[j]
						rate_percent = themortgagemeter_utils.get_percentage(t,logger)
						if rate_percent != '':
							break
						j += 1
					while svr_percent == '':
						i+=1
						for t in tds[i].text.encode('utf-8').split('\n'):
							svr_percent = themortgagemeter_utils.get_percentage(t,logger)
							if svr_percent != '':
								break
					while apr_percent == '':
						i+=1
						for t in tds[i].text.encode('utf-8').split('\n'):
							apr_percent = themortgagemeter_utils.get_percentage(t,logger)
							if apr_percent != '':
								break
					i+=1
					booking_fee = tds[i].text.strip().encode('utf-8')[2:].replace(',','')
					for eligibility in eligibilities:
						mc_util.handle_mortgage_insert(institution_code,mortgage_type,rate_percent,svr_percent,apr_percent,ltv_percent,initial_period,booking_fee,term,url,eligibility,logger)
		else:
			pass