def get_history(self): i = 0 ignore = False for tr in self.doc.xpath( '//table[@cellpadding="1"]/tr') + self.doc.xpath( '//tr[@class="rowClick" or @class="rowHover"]'): tds = tr.findall('td') if len(tds) < 4: continue # if there are more than 4 columns, ignore the first one. i = min(len(tds) - 4, 1) if tr.attrib.get('class', '') == 'DataGridHeader': if tds[2].text == u'Titulaire': ignore = True else: ignore = False continue if ignore: continue # Remove useless details detail = tr.cssselect('div.detail') if len(detail) > 0: detail[0].drop_tree() t = Transaction() date = u''.join([txt.strip() for txt in tds[i + 0].itertext()]) raw = u' '.join([txt.strip() for txt in tds[i + 1].itertext()]) debit = u''.join([txt.strip() for txt in tds[-2].itertext()]) credit = u''.join([txt.strip() for txt in tds[-1].itertext()]) t.parse(date, re.sub(r'[ ]+', ' ', raw)) card_debit_date = self.doc.xpath( u'//span[@id="MM_HISTORIQUE_CB_m_TableTitle3_lblTitle"] | //label[contains(text(), "débiter le")]' ) if card_debit_date: t.rdate = Date(dayfirst=True).filter(date) m = re.search('(\d{2}\/\d{2}\/\d{4})', card_debit_date[0].text) assert m t.date = Date(dayfirst=True).filter(m.group(1)) if t.date is NotAvailable: continue if 'tot dif' in t.raw.lower(): t.deleted = True t.set_amount(credit, debit) yield t i += 1
def get_transactions(self): table = self.document.findall('//tbody')[0] for tr in table.xpath('tr'): textdate = tr.find('td[@class="op_date"]').text_content() textraw = tr.find('td[@class="op_label"]').text_content().strip() # The id will be rewrite op = Transaction(1) amount = op.clean_amount(tr.find('td[@class="op_amount"]').text_content()) id = hashlib.md5(textdate + textraw.encode('utf-8') + amount.encode('utf-8')).hexdigest() op.id = id op.parse(date = date(*reversed([int(x) for x in textdate.split('/')])), raw = textraw) # force the use of website category op.category = unicode(tr.find('td[@class="op_type"]').text) op.amount = Decimal(amount) yield op
def get_transactions(self): table = self.document.findall('//tbody')[0] for tr in table.xpath('tr'): textdate = tr.find('td[@class="op_date"]').text_content() textraw = tr.find('td[@class="op_label"]').text_content().strip() # The id will be rewrite op = Transaction(1) amount = op.clean_amount( tr.find('td[@class="op_amount"]').text_content()) id = hashlib.md5(textdate + textraw.encode('utf-8') + amount.encode('utf-8')).hexdigest() op.id = id op.parse(date=date(*reversed([int(x) for x in textdate.split('/')])), raw=textraw) # force the use of website category op.category = unicode(tr.find('td[@class="op_type"]').text) op.amount = Decimal(amount) yield op
def get_transactions(self, index): i = 0 for table in self.document.xpath('//table'): try: textdate = table.find('.//td[@class="date"]').text_content() except AttributeError: continue # Do not parse transactions already parsed if i < index: i += 1 continue if textdate == 'hier': textdate = (date.today() - timedelta(days=1)).strftime('%d/%m/%Y') elif textdate == "aujourd'hui": textdate = date.today().strftime('%d/%m/%Y') else: frenchmonth = textdate.split(' ')[1] month = self.monthvalue[frenchmonth] textdate = textdate.replace(' ', '') textdate = textdate.replace(frenchmonth, '/%s/' %month) # We use lower for compatibility with old website textraw = table.find('.//td[@class="lbl"]').text_content().strip().lower() # The id will be rewrite op = Transaction(1) amount = op.clean_amount(table.xpath('.//td[starts-with(@class, "amount")]')[0].text_content()) id = hashlib.md5(textdate.encode('utf-8') + textraw.encode('utf-8') + amount.encode('utf-8')).hexdigest() op.id = id op.parse(date = date(*reversed([int(x) for x in textdate.split('/')])), raw = textraw) category = table.find('.//td[@class="picto"]/span') category = unicode(category.attrib['class'].split('-')[0].lower()) try: op.category = self.catvalue[category] except: op.category = category op.amount = Decimal(amount) yield op