class item(Transaction.TransactionElement): condition = lambda self: len(self.el.xpath('./td')) >= 4 obj_date = DateGuesser(CleanText(TableCell("date")), Env("date_guesser")) obj_vdate = DateGuesser(CleanText(TableCell("date")), Env("date_guesser"))
def parse(self, obj): self.env['date'] = DateGuesser(CleanText('./td[1]'), Env('date_guesser'))(self) self.env['vdate'] = NotAvailable if CleanText('//table[@class="ca-table"][caption[span[b[text()="Historique des opérations"]]]]//tr[count(td) = 4]')(self): # History table with 4 columns self.env['raw'] = CleanText('./td[2]', children=False)(self) self.env['amount'] = CleanDecimal.French('./td[last()]')(self) elif CleanText('//table[@class="ca-table"][caption[span[b[text()="Historique des opérations"]]]]//tr[count(td) = 5]')(self): # History table with 5 columns self.env['raw'] = CleanText('./td[3]', children=False)(self) self.env['amount'] = CleanDecimal.French('./td[last()]')(self) elif CleanText('//table[@class="ca-table"][caption[span[b[text()="Historique des opérations"]]]]//tr[count(td) = 6]')(self): # History table with 6 columns (contains vdate) self.env['raw'] = CleanText('./td[4]', children=False)(self) self.env['vdate'] = DateGuesser(CleanText('./td[2]'), Env('date_guesser'))(self) self.env['amount'] = CleanDecimal.French('./td[last()]')(self) elif CleanText('//table[@class="ca-table"][caption[span[b[text()="Historique des opérations"]]]]//tr[count(td) = 7]')(self): # History table with 7 columns self.env['amount'] = Coalesce( CleanDecimal.French('./td[6]', sign=lambda x: -1, default=None), CleanDecimal.French('./td[7]', default=None) )(self) if CleanText('//table[@class="ca-table"][caption[span[b[text()="Historique des opérations"]]]]//th[a[contains(text(), "Valeur")]]')(self): # With vdate column ('Valeur') self.env['raw'] = CleanText('./td[4]', children=False)(self) self.env['vdate'] = DateGuesser(CleanText('./td[2]'), Env('date_guesser'))(self) else: # Without any vdate column self.env['raw'] = CleanText('./td[3]', children=False)(self) else: assert False, 'This type of history table is not handled yet!'
def obj_date(self): # debit date is guessed in text such as 'Opérations débitées le 05/07' guessed_date = DateGuesser( Regexp( CleanText( self.xpath( './preceding-sibling::tr[.//a[contains(text(), "Opérations débitées le")]][1]' )), r'(\d{2}/\d{2})'), Env("date_guesser"))(self) # Handle the case where the guessed debit date would be before the rdate (happens when # the debit date is in january whereas the rdate is in december). if guessed_date < Field('rdate')(self): return guessed_date.replace(year=guessed_date.year + 1) return guessed_date
def get_card_transactions(self, latest_date, ongoing_coming): for item in self.doc.xpath('//table[@class="ca-table"][2]//tr[td]'): if CleanText('./td[2]/b')(item): # This node is a summary containing the 'date' for all following transactions. raw_date = Regexp(CleanText('./td[2]/b/text()'), r'le (.*) :')(item) if latest_date and parse_french_date( raw_date).date() > latest_date: # This summary has already been fetched continue latest_date = parse_french_date(raw_date).date() if latest_date < ongoing_coming: # This summary is anterior to the ongoing_coming so we create a transaction from it tr = FrenchTransaction() tr.date = tr.rdate = latest_date tr.raw = tr.label = CleanText('./td[2]/b/text()')(item) tr.amount = -CleanDecimal.French( './td[position()=last()]')(item) tr.type = FrenchTransaction.TYPE_CARD_SUMMARY yield tr else: # This node is a real transaction. # Its 'date' is the date of the most recently encountered summary node. tr = FrenchTransaction() tr.date = latest_date date_guesser = LinearDateGuesser(latest_date) tr.rdate = tr.bdate = DateGuesser( CleanText('./td[1]//text()'), date_guesser=date_guesser)(item) tr.label = tr.raw = CleanText('./td[2]')(item) tr.amount = CleanDecimal.French('./td[last()]')(item) tr.type = FrenchTransaction.TYPE_DEFERRED_CARD yield tr
def obj_date(self): date = CleanText('./td[1]/font//text()')(self) if len(date) == 10: return Date(CleanText('./td[1]/font//text()'), dayfirst=True)(self) elif len(date) == 5: # Date has no indicated year. return DateGuesser(CleanText('./td[1]//text()'), Env('date_guesser'))(self)
def obj_date(self): # Dates in the first column may appear as '12/01/2019' or '12/01' date = CleanText('./td[1]/font//text()')(self) if len(date) == 10: return Date(CleanText('./td[1]/font//text()'), dayfirst=True)(self) elif len(date) == 5: # Date has no indicated year. return DateGuesser(CleanText('./td[1]//text()'), Env('date_guesser'))(self)
class item(ItemElement): klass = Transaction obj_date = DateGuesser(CleanText('.//span[contains(., "/")]'), LinearDateGuesser(date_max_bump=timedelta(45))) obj_label = CleanText('.//h3/strong') obj_amount = MyDecimal('./td[@class="al-r"]/div/span[has-class("badge")]') def obj_type(self): amount = Field('amount')(self) if amount < 0: return Transaction.TYPE_CARD else: return Transaction.TYPE_TRANSFER
def obj_date(self): return DateGuesser( Regexp( CleanText(self.page.doc.xpath('//table/tr[2]/td[1]')), r'(\d{2}/\d{2})'), Env("date_guesser"))(self)
def date(selector): return DateGuesser(CleanText(selector), Env('date_guesser')) | Transaction.Date(selector)