Пример #1
0
def get_product_page_interest_rates(url,savings_data):
	logger = logging.getLogger('retrieve ' + url)
	bsobj = themortgagemeter_utils.get_page(False,'',url,logger)
	#logger.info(url) #logger.info(bsobj)
	if re.match('.*isa.*',url):
		savings_data['isa'] = 'Y'
	for t in bsobj.find_all('table'):
		#logger.info("TABLE")# logger.info(t)
		# Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly.
		summary = t.get('summary').encode('utf-8').lower()
		if summary:
			# Set up data for this page
			summary_info = re.match('.*interest rates: (.*)',summary).group(1)
			#logger.info("summary info: " + summary_info)
			if summary_info in ("cash e-isa#"):
				savings_data['isa'] = 'Y'
			elif summary_info in ("fixed rate saver - monthly interest"):
				savings_data['variability'] = 'F'
				savings_data['interest_paid'] = 'M'
			elif summary_info in ("fixed rate saver - annual interest"):
				savings_data['variability'] = 'F'
				savings_data['interest_paid'] = 'Y'
			elif "regular saver" in summary_info:
				savings_data['regular_saver'] = 'Y'
				savings_data['interest_paid'] = 'Y'
			elif "online bonus" in summary_info:
				savings_data['bonus']  = 'Y'
				savings_data['branch'] = 'N'
				savings_data['bonus_frequency_period'] = '1'
				savings_data['bonus_frequency_type']   = 'M'
				# skip bonus for HSBC- it's complicated - probably needs its own function TODO
				continue
			elif "flexible saver" in summary_info:
				savings_data['variability']     = 'V'
			else:
				themortgagemeter_utils.record_alert('NEED TO HANDLE: ' + summary_info,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
				exit()
			tr_count = 0
			for tr in t.find_all('tr'):
				# This is a new savings product, so clone the data at this point and use that from here.
				this_savings_data = savings_data.copy()
				#logger.info("TR " + str(tr_count)) #logger.info(tr)
				if this_savings_data['bonus'] == 'Y':
					#print "BONUS"
					#print tr
					pass
				if this_savings_data['regular_saver'] == 'Y':
					td_count = -1
				else:
					td_count = 0
				if tr_count >= 1:
					# If tax-free, this will be true
					for td in tr.find_all('td'):
						td_style = td.get('style')
						if td_style != None:
							td_style = td_style.lower().encode('utf-8').translate(None, ' ')
							if td_style == 'vertical-align:middle':
								continue
						#logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count)
						logger.info(td)
						v = td.text.encode('utf-8').lower().strip()
						if td_count == 0:
							#logger.info(this_savings_data['regular_saver'])
							if this_savings_data['regular_saver'] == 'Y':
								logger.info('regular_saver: ' + v)
								this_savings_data['regular_saver_min_amt'] = v.split()[0][2:]
								this_savings_data['regular_saver_max_amt'] = v.split()[2][2:]
								if v.split()[4] == "month":
									this_savings_data['regular_saver_frequency_period'] = '1'
									this_savings_data['regular_saver_frequency_type'] = 'M'
								else:
									themortgagemeter_utils.record_alert('ERROR: reg saver not parsed: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
									exit()
							else:
								# if it's got a + at the end, it's a min, if it's "up to" it's a max.
								res = savings_util.get_money_range(v,logger)
								this_savings_data['min_amt'] = res[0]
								this_savings_data['max_amt'] = res[1]
								# TODO: remove this section
								#if re.match('^.*\+$',v):
								#	money_val = themortgagemeter_utils.get_money(v,logger)
								#	this_savings_data['min_amt'] = money_val
								#elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v):
								#	money_val = themortgagemeter_utils.get_money(v,logger)
								#	this_savings_data['max_amt'] = money_val
								#	this_savings_data['min_amt'] = 0
								#elif re.match('^.* - .*$',v):
								#	this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',')
								#	this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',')
								#else:
								#	#logger.info(t) #logger.info('value not handled: ' + v)
								#	themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
								#	exit()
						elif td_count == 1:
							# we don't bother with net_percent
							pass
						elif td_count == 2:
							# gross %
							this_savings_data['gross_percent'] = v
						elif td_count == 3:
							this_savings_data['aer_percent'] = v
						td_count += 1
					# Some trs have no tds; we ignore those.
					if td_count > 0:
						# Now store this product
						# TODO: fixed savings?
						logger.info(this_savings_data)
						isa = this_savings_data['isa']
						regular_saver = this_savings_data['regular_saver']
						regular_saver_frequency_period = this_savings_data['regular_saver_frequency_period']
						regular_saver_frequency_type = this_savings_data['regular_saver_frequency_type']
						regular_saver_min_amt = this_savings_data['regular_saver_min_amt']
						regular_saver_max_amt = this_savings_data['regular_saver_max_amt']
						bonus = this_savings_data['bonus']
						bonus_frequency_period = this_savings_data['bonus_frequency_period']
						bonus_frequency_type = this_savings_data['bonus_frequency_type']
						online = this_savings_data['online']
						branch = this_savings_data['branch']
						variability = this_savings_data['variability']
						min_amt = this_savings_data['min_amt']
						max_amt = this_savings_data['max_amt']
						gross_percent = this_savings_data['gross_percent']
						aer_percent = this_savings_data['aer_percent']
						interest_paid = this_savings_data['interest_paid']
						child = this_savings_data['child']
						savings_period = this_savings_data['savings_period']
						savings_util.handle_savings_insert(institution_code, isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, url, logger)
				else:
					tr_count += 1
					continue
				tr_count += 1
		else:
			#print url
			#print bsobj
			exit()
Пример #2
0
def get_product_pages(static, base_url, ext_url, logger):
    url = base_url + ext_url
    urls_seen = []
    bsobj = themortgagemeter_utils.get_page(
        static, 'static_html/halifax/savings-accounts.html', url, logger)
    # let's see how much info we can extract from the page
    # Get all the sortable tables and divine as much info as possible from that.
    sortable_tables = doormatCols = bsobj.find_all(
        attrs={'class': 'sortableTable'})
    for table in sortable_tables:
        #print table
        for tr in table.find_all('tr'):
            td_idx = 0
            savings_data = savings_util.get_savings_data_object()
            for td in tr.find_all('td'):
                td_text = td.text.encode('utf-8').strip().lower()
                if td_idx == 0:
                    # title of account - Junior == child
                    #re.match('/product/A[0-9]+.*',href)
                    ##0 ##<td style="text-align: left;"><a href="/savings/accounts/cash-isas/isa-saver-online/">ISA Saver Online</a></td>
                    ##1 ##<td style="text-align: left;"><strong class="apr">1.35%</strong> tax free/AER variable including 12 month fixed bonus of<strong> </strong>1.10%</td>
                    ##2 ##<td>£1</td>
                    ##3 ##<td>Variable</td>
                    ##4 ##<td>Unlimited</td>
                    ##5 ##<td style="text-align: center;"><img alt="Online" src="/common/images/icons/mousegrey.gif" title="Online"/></td>
                    ##6 ##<td><a href="/savings/accounts/cash-isas/isa-saver-online/"><img alt="Find out more" src="/common/images/Buttons/primary_find_out_more.gif"/></a></td>
                    #print "0: " + td_text
                    if re.match('^.*isa.*$', td_text):
                        savings_data['isa'] = 'Y'
                    if re.match('^.*junior.*$', td_text):
                        savings_data['child'] = 'Y'
                elif td_idx == 1:
                    #print "1: " + td_text
                    # We don't bother with this at the moment - TODO - sort this out
                    #if re.match('.*bonus.*',td_text):
                    #	savings_data['bonus'] = 'Y'
                    pass
                elif td_idx == 2:
                    #print "2: " + td_text
                    # minimum investment, max is always infinity
                    min_amt = themortgagemeter_utils.get_money(td_text, logger)
                    savings_data['min_amt'] = min_amt
                elif td_idx == 3:
                    #print "3: " + td_text
                    # Variable/Fixed
                    if re.match('.*variable.*', td_text):
                        savings_data['variability'] = 'V'
                    elif re.match('.*fixed.*', td_text):
                        savings_data['variability'] = 'F'
                    else:
                        themortgagemeter_utils.record_alert(
                            'ERROR: unknown variability: ' + td_text, logger,
                            themortgagemeter_db.db_connection,
                            themortgagemeter_db.cursor)
                        exit()
                elif td_idx == 4:
                    #print "4: " + td_text
                    # Let's assume we'll get this info from the sub-page.
                    # Withdrawals allowed: "None, by closure only", "Unlimited", "None, until child is 18"
                    pass
                elif td_idx == 5:
                    #print "5: " + td_text
                    for img in td.find_all('img'):
                        title = img.get('title').lower().strip()
                        if title == 'online':
                            savings_data['online'] = 'Y'
                        elif title == 'branch':
                            savings_data['branch'] = 'Y'
                            # I'm going to ignore "phone"
                        elif title == 'phone':
                            pass
                elif td_idx == 6:
                    #print "6: " + td_text
                    # more details link
                    new_url = base_url + td.find_all('a')[0].get('href')
                    if new_url in urls_seen:
                        continue
                    savings_array = process_more_info_page(
                        savings_data, new_url, logger)
                    print new_url
                    for this_savings_data in savings_array:
                        # insert savings here TODO.
                        print this_savings_data
                        isa = this_savings_data['isa']
                        regular_saver = this_savings_data['regular_saver']
                        regular_saver_frequency_period = this_savings_data[
                            'regular_saver_frequency_period']
                        regular_saver_frequency_type = this_savings_data[
                            'regular_saver_frequency_type']
                        regular_saver_min_amt = this_savings_data[
                            'regular_saver_min_amt']
                        regular_saver_max_amt = this_savings_data[
                            'regular_saver_max_amt']
                        bonus = this_savings_data['bonus']
                        bonus_frequency_period = this_savings_data[
                            'bonus_frequency_period']
                        bonus_frequency_type = this_savings_data[
                            'bonus_frequency_type']
                        online = this_savings_data['online']
                        branch = this_savings_data['branch']
                        variability = this_savings_data['variability']
                        min_amt = this_savings_data['min_amt']
                        max_amt = this_savings_data['max_amt']
                        gross_percent = this_savings_data['gross_percent']
                        aer_percent = this_savings_data['aer_percent']
                        interest_paid = this_savings_data['interest_paid']
                        child = this_savings_data['child']
                        savings_period = this_savings_data['savings_period']
                        savings_util.handle_savings_insert(
                            institution_code, isa, regular_saver,
                            regular_saver_frequency_period,
                            regular_saver_frequency_type,
                            regular_saver_min_amt, regular_saver_max_amt,
                            bonus, bonus_frequency_period,
                            bonus_frequency_type, online, branch, variability,
                            savings_period, min_amt, max_amt, gross_percent,
                            aer_percent, child, interest_paid, url, logger)
                    urls_seen.insert(0, new_url)
                else:
                    themortgagemeter_utils.record_alert(
                        'ERROR: too many tds in tr: ' + tr, logger,
                        themortgagemeter_db.db_connection,
                        themortgagemeter_db.cursor)
                    exit()
                td_idx = td_idx + 1
Пример #3
0
def get_product_pages(static,base_url,ext_url,logger):
	url = base_url + ext_url
	urls_seen = []
	bsobj = themortgagemeter_utils.get_page(static,'static_html/halifax/savings-accounts.html',url,logger)
	# let's see how much info we can extract from the page
	# Get all the sortable tables and divine as much info as possible from that.
	sortable_tables = doormatCols = bsobj.find_all(attrs={'class' : 'sortableTable'})
	for table in sortable_tables:
		#print table
		for tr in table.find_all('tr'):
			td_idx = 0
			savings_data = savings_util.get_savings_data_object()
			for td in tr.find_all('td'):
				td_text = td.text.encode('utf-8').strip().lower()
				if td_idx == 0:
					# title of account - Junior == child
					#re.match('/product/A[0-9]+.*',href)
					##0 ##<td style="text-align: left;"><a href="/savings/accounts/cash-isas/isa-saver-online/">ISA Saver Online</a></td>
					##1 ##<td style="text-align: left;"><strong class="apr">1.35%</strong> tax free/AER variable including 12 month fixed bonus of<strong> </strong>1.10%</td>
					##2 ##<td>£1</td>
					##3 ##<td>Variable</td>
					##4 ##<td>Unlimited</td>
					##5 ##<td style="text-align: center;"><img alt="Online" src="/common/images/icons/mousegrey.gif" title="Online"/></td>
					##6 ##<td><a href="/savings/accounts/cash-isas/isa-saver-online/"><img alt="Find out more" src="/common/images/Buttons/primary_find_out_more.gif"/></a></td>
					#print "0: " + td_text
					if re.match('^.*isa.*$',td_text):
						savings_data['isa'] = 'Y'
					if re.match('^.*junior.*$',td_text):
						savings_data['child'] = 'Y'
				elif td_idx == 1:
					#print "1: " + td_text
					# We don't bother with this at the moment - TODO - sort this out
					#if re.match('.*bonus.*',td_text):
					#	savings_data['bonus'] = 'Y'
					pass
				elif td_idx == 2:
					#print "2: " + td_text
					# minimum investment, max is always infinity
					min_amt = themortgagemeter_utils.get_money(td_text,logger)
					savings_data['min_amt'] = min_amt
				elif td_idx == 3:
					#print "3: " + td_text
					# Variable/Fixed
					if re.match('.*variable.*',td_text):
						savings_data['variability'] = 'V'
					elif re.match('.*fixed.*',td_text):
						savings_data['variability'] = 'F'
					else:
						themortgagemeter_utils.record_alert('ERROR: unknown variability: ' + td_text,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
						exit()
				elif td_idx == 4:
					#print "4: " + td_text
					# Let's assume we'll get this info from the sub-page.
					# Withdrawals allowed: "None, by closure only", "Unlimited", "None, until child is 18"
					pass
				elif td_idx == 5:
					#print "5: " + td_text
					for img in td.find_all('img'):
						title = img.get('title').lower().strip()
						if title == 'online':
							savings_data['online'] = 'Y'
						elif title == 'branch':
							savings_data['branch'] = 'Y'
							# I'm going to ignore "phone"
						elif title == 'phone':
							pass
				elif td_idx == 6:
					#print "6: " + td_text
					# more details link
					new_url = base_url + td.find_all('a')[0].get('href')
					if new_url in urls_seen:
						continue
					savings_array = process_more_info_page(savings_data,new_url,logger)
					print new_url
					for this_savings_data in savings_array:
						# insert savings here TODO.
						print this_savings_data
						isa = this_savings_data['isa']
						regular_saver = this_savings_data['regular_saver']
						regular_saver_frequency_period = this_savings_data['regular_saver_frequency_period']
						regular_saver_frequency_type = this_savings_data['regular_saver_frequency_type']
						regular_saver_min_amt = this_savings_data['regular_saver_min_amt']
						regular_saver_max_amt = this_savings_data['regular_saver_max_amt']
						bonus = this_savings_data['bonus']
						bonus_frequency_period = this_savings_data['bonus_frequency_period']
						bonus_frequency_type = this_savings_data['bonus_frequency_type']
						online = this_savings_data['online']
						branch = this_savings_data['branch']
						variability = this_savings_data['variability']
						min_amt = this_savings_data['min_amt']
						max_amt = this_savings_data['max_amt']
						gross_percent = this_savings_data['gross_percent']
						aer_percent = this_savings_data['aer_percent']
						interest_paid = this_savings_data['interest_paid']
						child = this_savings_data['child']
						savings_period = this_savings_data['savings_period']
						savings_util.handle_savings_insert(institution_code, isa, regular_saver, regular_saver_frequency_period, regular_saver_frequency_type, regular_saver_min_amt, regular_saver_max_amt, bonus, bonus_frequency_period, bonus_frequency_type, online, branch, variability, savings_period, min_amt, max_amt, gross_percent, aer_percent, child, interest_paid, url, logger)
					urls_seen.insert(0,new_url)
				else:
					themortgagemeter_utils.record_alert('ERROR: too many tds in tr: ' + tr,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
					exit()
				td_idx = td_idx + 1
Пример #4
0
def get_product_page_interest_rates(url, savings_data):
    logger = logging.getLogger('retrieve ' + url)
    bsobj = themortgagemeter_utils.get_page(False, '', url, logger)
    #logger.info(url) #logger.info(bsobj)
    if re.match('.*isa.*', url):
        savings_data['isa'] = 'Y'
    for t in bsobj.find_all('table'):
        #logger.info("TABLE")# logger.info(t)
        # Get all tables, then match on summary == "Interest rates:.*", and set up variables accordingly.
        summary = t.get('summary').encode('utf-8').lower()
        if summary:
            # Set up data for this page
            summary_info = re.match('.*interest rates: (.*)', summary).group(1)
            #logger.info("summary info: " + summary_info)
            if summary_info in ("cash e-isa#"):
                savings_data['isa'] = 'Y'
            elif summary_info in ("fixed rate saver - monthly interest"):
                savings_data['variability'] = 'F'
                savings_data['interest_paid'] = 'M'
            elif summary_info in ("fixed rate saver - annual interest"):
                savings_data['variability'] = 'F'
                savings_data['interest_paid'] = 'Y'
            elif "regular saver" in summary_info:
                savings_data['regular_saver'] = 'Y'
                savings_data['interest_paid'] = 'Y'
            elif "online bonus" in summary_info:
                savings_data['bonus'] = 'Y'
                savings_data['branch'] = 'N'
                savings_data['bonus_frequency_period'] = '1'
                savings_data['bonus_frequency_type'] = 'M'
                # skip bonus for HSBC- it's complicated - probably needs its own function TODO
                continue
            elif "flexible saver" in summary_info:
                savings_data['variability'] = 'V'
            else:
                themortgagemeter_utils.record_alert(
                    'NEED TO HANDLE: ' + summary_info, logger,
                    themortgagemeter_db.db_connection,
                    themortgagemeter_db.cursor)
                exit()
            tr_count = 0
            for tr in t.find_all('tr'):
                # This is a new savings product, so clone the data at this point and use that from here.
                this_savings_data = savings_data.copy()
                #logger.info("TR " + str(tr_count)) #logger.info(tr)
                if this_savings_data['bonus'] == 'Y':
                    #print "BONUS"
                    #print tr
                    pass
                if this_savings_data['regular_saver'] == 'Y':
                    td_count = -1
                else:
                    td_count = 0
                if tr_count >= 1:
                    # If tax-free, this will be true
                    for td in tr.find_all('td'):
                        td_style = td.get('style')
                        if td_style != None:
                            td_style = td_style.lower().encode(
                                'utf-8').translate(None, ' ')
                            if td_style == 'vertical-align:middle':
                                continue
                        #logger.info("TD" + str(td_count)) #logger.info(tr_count) #logger.info(td_count)
                        logger.info(td)
                        v = td.text.encode('utf-8').lower().strip()
                        if td_count == 0:
                            #logger.info(this_savings_data['regular_saver'])
                            if this_savings_data['regular_saver'] == 'Y':
                                logger.info('regular_saver: ' + v)
                                this_savings_data[
                                    'regular_saver_min_amt'] = v.split()[0][2:]
                                this_savings_data[
                                    'regular_saver_max_amt'] = v.split()[2][2:]
                                if v.split()[4] == "month":
                                    this_savings_data[
                                        'regular_saver_frequency_period'] = '1'
                                    this_savings_data[
                                        'regular_saver_frequency_type'] = 'M'
                                else:
                                    themortgagemeter_utils.record_alert(
                                        'ERROR: reg saver not parsed: ' + v,
                                        logger,
                                        themortgagemeter_db.db_connection,
                                        themortgagemeter_db.cursor)
                                    exit()
                            else:
                                # if it's got a + at the end, it's a min, if it's "up to" it's a max.
                                res = savings_util.get_money_range(v, logger)
                                this_savings_data['min_amt'] = res[0]
                                this_savings_data['max_amt'] = res[1]
                                # TODO: remove this section
                                #if re.match('^.*\+$',v):
                                #	money_val = themortgagemeter_utils.get_money(v,logger)
                                #	this_savings_data['min_amt'] = money_val
                                #elif re.match('^.*up to.*$',v) or re.match('^.*under.*$',v):
                                #	money_val = themortgagemeter_utils.get_money(v,logger)
                                #	this_savings_data['max_amt'] = money_val
                                #	this_savings_data['min_amt'] = 0
                                #elif re.match('^.* - .*$',v):
                                #	this_savings_data['min_amt'] = v.split()[0][2:].translate(None,',')
                                #	this_savings_data['max_amt'] = v.split()[2][2:].translate(None,',')
                                #else:
                                #	#logger.info(t) #logger.info('value not handled: ' + v)
                                #	themortgagemeter_utils.record_alert('ERROR: value wrong: ' + v,logger,themortgagemeter_db.db_connection,themortgagemeter_db.cursor)
                                #	exit()
                        elif td_count == 1:
                            # we don't bother with net_percent
                            pass
                        elif td_count == 2:
                            # gross %
                            this_savings_data['gross_percent'] = v
                        elif td_count == 3:
                            this_savings_data['aer_percent'] = v
                        td_count += 1
                    # Some trs have no tds; we ignore those.
                    if td_count > 0:
                        # Now store this product
                        # TODO: fixed savings?
                        logger.info(this_savings_data)
                        isa = this_savings_data['isa']
                        regular_saver = this_savings_data['regular_saver']
                        regular_saver_frequency_period = this_savings_data[
                            'regular_saver_frequency_period']
                        regular_saver_frequency_type = this_savings_data[
                            'regular_saver_frequency_type']
                        regular_saver_min_amt = this_savings_data[
                            'regular_saver_min_amt']
                        regular_saver_max_amt = this_savings_data[
                            'regular_saver_max_amt']
                        bonus = this_savings_data['bonus']
                        bonus_frequency_period = this_savings_data[
                            'bonus_frequency_period']
                        bonus_frequency_type = this_savings_data[
                            'bonus_frequency_type']
                        online = this_savings_data['online']
                        branch = this_savings_data['branch']
                        variability = this_savings_data['variability']
                        min_amt = this_savings_data['min_amt']
                        max_amt = this_savings_data['max_amt']
                        gross_percent = this_savings_data['gross_percent']
                        aer_percent = this_savings_data['aer_percent']
                        interest_paid = this_savings_data['interest_paid']
                        child = this_savings_data['child']
                        savings_period = this_savings_data['savings_period']
                        savings_util.handle_savings_insert(
                            institution_code, isa, regular_saver,
                            regular_saver_frequency_period,
                            regular_saver_frequency_type,
                            regular_saver_min_amt, regular_saver_max_amt,
                            bonus, bonus_frequency_period,
                            bonus_frequency_type, online, branch, variability,
                            savings_period, min_amt, max_amt, gross_percent,
                            aer_percent, child, interest_paid, url, logger)
                else:
                    tr_count += 1
                    continue
                tr_count += 1
        else:
            #print url
            #print bsobj
            exit()