class item(ItemElement): klass = Account def condition(self): return self.page.accounts_list_condition(self.el) class Type(Filter): def filter(self, label): for pattern, actype in AccountsPage.TYPES.items(): if pattern in label: return actype return Account.TYPE_UNKNOWN obj__title = CleanText('td[@class="ColonneLibelle"][2]') obj__nature = CleanText('td[@class="ColonneLibelle"][3]') obj_label = Format('%s %s', Field('_title'), Field('_nature')) obj_currency = FrenchTransaction.Currency( './td[@class="ColonneCode"]') obj_id = CleanText('td[@class="ColonneLibelle"][1]') obj__link = Link('td[@class="ColonneLibelle"][1]/a', default=NotAvailable) obj__rib_link = Link('.//a[contains(@href, "rib.jsp")]') obj_type = Type(Field('label')) obj_balance = CleanDecimal('td[@class="ColonneNumerique"]/nobr', replace_dots=True)
def obj_type(self): types = {'comptes? bancaires?': Account.TYPE_CHECKING, 'livrets?': Account.TYPE_SAVINGS, 'epargnes? logement': Account.TYPE_SAVINGS, "autres produits d'epargne": Account.TYPE_SAVINGS, 'comptes? titres? et pea': Account.TYPE_MARKET, 'compte-titres': Account.TYPE_MARKET, 'assurances? vie et retraite': Account.TYPE_LIFE_INSURANCE, u'prêt': Account.TYPE_LOAN, u'crédits?': Account.TYPE_LOAN, 'plan d\'epargne en actions': Account.TYPE_PEA } # first trying to match with label label = Field('label')(self) for atypetxt, atype in types.items(): if re.findall(atypetxt, label.lower()): # match with/without plurial in type return atype # then by type type = Regexp(CleanText('../../preceding-sibling::div[@class="avoirs"][1]/span[1]'), r'(\d+) (.*)', '\\2')(self) for atypetxt, atype in types.items(): if re.findall(atypetxt, type.lower()): # match with/without plurial in type return atype return Account.TYPE_UNKNOWN
class item(ItemElement): # Main account cards are all deferred and their # coming is already displayed with a '-' sign. klass = Account def condition(self): card_situation = Dict('codeSituationCarte')(self) if card_situation not in (5, 7): # Cards with codeSituationCarte equal to 7 are active and present on the website # Cards with codeSituationCarte equal to 5 are absent on the website, we skip them self.logger.warning( 'codeSituationCarte unknown, Check if the %s card is present on the website', Field('id')(self)) return card_situation != 5 obj_id = CleanText(Dict('idCarte'), replace=[(' ', '')]) obj_number = Field('id') obj_label = Format('Carte %s %s', Field('id'), CleanText(Dict('titulaire'))) obj_type = Account.TYPE_CARD obj_coming = Eval(float_to_decimal, Dict('encoursCarteM')) obj_balance = Decimal(0) obj__index = Dict('index') obj__id_element_contrat = None
def obj_type(self): if Field('raw')(self) in self.page.browser.SUMMARY_CARD_LABEL: return Transaction.TYPE_CARD_SUMMARY elif Field('amount')(self) > 0: return Transaction.TYPE_ORDER else: return Transaction.TYPE_DEFERRED_CARD
def obj__redacted_card(self): raw = Field('raw')(self) if not raw.startswith('FACTURE CARTE') or ' SUIVANT RELEVE DU ' in raw: return page = Async('details').loaded_page(self) return page.get_redacted_card()
class item(ItemElement): klass = Transaction obj_id = None # will be overwrited by the browser # we use lower for compatibility with the old website obj_amount = CleanDecimal('.//td[starts-with(@class, "amount")]', replace_dots=True) obj_date = INGDate(CleanText('.//td[@class="date"]'), dayfirst=True) obj_rdate = Field('date') obj__hash = PreHashmd5(Field('date'), Field('raw'), Field('amount')) obj_category = INGCategory( Attr('.//td[@class="picto"]/span', 'class')) def obj_raw(self): return Transaction.Raw( Lower('.//td[@class="lbl"]'))(self) or Format( '%s %s', Field('date'), Field('amount'))(self) def condition(self): if self.el.find('.//td[@class="date"]') is None: return False if 'index' in self.env and self.env[ 'index'] > 0 and self.page.i < self.env['index']: self.page.i += 1 return False return True
def obj_url(self): data = self.el if not data['request']['files']: raise ParseError( 'Unable to detect any stream method for id: %r (available: %s)' % (int(Field('id')(self)), data['request']['files'].keys())) # Choosen method is not available, we choose an other one method = self.obj._method if method not in data['request']['files']: method = data['request']['files'].keys()[0] streams = data['request']['files'][method] if not streams: raise ValueError('There is no url available for id: %r' % (int(Field('id')(self)))) # stream is single for hls, just return the url stream = streams['url'] if method == 'hls' else None # ...but a list for progressive # we assume the list is sorted by quality with best first if not stream: quality = self.obj._quality stream = streams[quality]['url'] if quality < len( streams) else streams[0]['url'] return stream
def parse(self, el): label = Field('label')(self) trs = self.xpath('//td[contains(text(), $label)]/ancestor::tr[1] | ./ancestor::table[1]/tbody/tr', label=label) i = [i for i in range(len(trs)) if el == trs[i]] i = i[0] if i else 0 label = label.replace(' ', '') self.env['id'] = "%s%s%s" % (Regexp(CleanText(TableCell('id')), r'(\w+)\s-\s(\w+)', r'\1\2')(self), label.replace(' ', ''), i)
class TransactionElement(ItemElement): klass = Transaction obj_label = Format('%s du %s', Field('_labeltype'), Field('date')) obj_date = Date(Regexp( CleanText( './ancestor::div[@class="onerow" or starts-with(@id, "term") or has-class("grid")]/' 'preceding-sibling::h3[1]//div[contains(text(), "Date")]'), r'(\d{2}\/\d{2}\/\d{4})'), dayfirst=True) obj_type = Transaction.TYPE_BANK obj_amount = MyDecimal( './ancestor::div[@class="onerow" or starts-with(@id, "term") or has-class("grid")]/' 'preceding-sibling::h3[1]//div[has-class("montant-mobile")]', default=NotAvailable) obj__labeltype = Regexp(Capitalize('./preceding::h2[@class="feature"][1]'), 'Historique Des\s+(\w+)') def obj_investments(self): return list(self.iter_investments(self.page, parent=self)) @method class iter_investments(ListElement): item_xpath = './div[@class="line"]' class item(InvestmentElement): pass def parse(self, el): self.env['date'] = Field('date')(self)
class item(ItemElement): klass = Bill obj__simple_id = CleanText( './/div[has-class("actions")]//span[has-class("value")]') obj_id = Format('%s_%s', Env('subid'), Field('_simple_id')) obj_url = Format( '/gp/css/summary/print.html/ref=oh_aui_ajax_pi?ie=UTF8&orderID=%s', Field('_simple_id')) obj_format = 'html' obj_label = Format('Facture %s', Field('_simple_id')) obj_type = 'bill' def obj_date(self): currency = Env('currency')(self) return parse_french_date( CleanText( './/div[has-class("a-col-left")]//span[has-class("value") and not(contains(., "%s"))]' % currency)(self)) def obj_price(self): currency = Env('currency')(self) return CleanDecimal( './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency, replace_dots=currency == u'EUR')(self) def obj_currency(self): currency = Env('currency')(self) return Currency( './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency)(self)
def obj_type(self): if Field('label')(self).startswith('ETOILE'): return self.page.TYPES.get( Field('label')(self).split()[1].upper(), Account.TYPE_UNKNOWN) return self.page.TYPES.get( Field('label')(self).split()[0].upper(), Account.TYPE_UNKNOWN)
def condition(self): # We do not scrape "Arrêté annuel" transactions since it is just a yearly synthesis of the contract, # nor "Fusion-absorption" transactions because they have no amount. return ( "Validé" in CleanText('./td[3]')(self) and "Arrêté annuel" not in Field('label')(self) and "Fusion-absorption" not in Field('label')(self) )
def obj_amount(self): if not Field('obj_commission'): return Field('_obj_amnt') else: return CleanDecimal(replace_dots=False).filter( self.el.xpath('./td[5]')) - CleanDecimal( replace_dots=False).filter( self.el.xpath('./td[6]'))
def obj_url(self): if Field('_url_base')(self): # URL won't work if HTML is not unescape return HTMLParser().unescape(str(Field('_url_base')(self))) else: return Link(TableCell( Field('_cell')(self))(self)[0].xpath('./a'), default=NotAvailable)(self)
class item(ItemElement): klass = Subscription obj_id = CleanText('//span[@class="welcome-text"]/b') obj__balance = CleanDecimal(CleanText('//span[@class="balance"]'), replace_dots=False) obj_label = Format(u"Poivy - %s - %s €", Field('id'), Field('_balance'))
def parse(self, el): i = Investment() i.label = Field('label')(self) i.code = CleanText(TableCell('code'))(self) i.quantity = MyDecimal(TableCell('quantity'))(self) i.valuation = Field('amount')(self) i.vdate = Field('date')(self) self.env['investments'] = [i]
def parse(self, el): account = [ acc for acc in self.env['accounts'] if acc.id == Field('id')(self) ] if account: account[0]._card_links.append(Field('_link_id')(self)) raise SkipItem()
def obj_label(self): if Field('_label_base')(self): return HTMLParser.HTMLParser().unescape( str(Field('_label_base')(self))) else: return CleanText( TableCell(Field('_cell')(self))(self)[0].xpath( './/span[@class="ec_visually_hidden"]'))(self)
def obj_gross_amount(self): if not empty(Field('commission')(self)): # gross_amount can be scraped from labels like 'REMISE CB /14/08 XXXXXX YYYYYYYYYYY ZZ 105,00E COM 0,84E' return CleanDecimal.French(Regexp( Field('label'), r' ([\d{1,3}\s?]*\d{1,3},\d{2})E COM', default=''), default=NotAvailable)(self) return NotAvailable
class item(ItemElement): klass = Bill load_details = Field('_pre_url') & AsyncLoad obj__simple_id = CleanText( './/span[contains(text(), "N° de commande")]/following-sibling::span' ) obj_id = Format('%s_%s', Env('subid'), Field('_simple_id')) obj__pre_url = Format( '/gp/shared-cs/ajax/invoice/invoice.html?orderId=%s&relatedRequestId=%s&isADriveSubscription=&isHFC=', Field('_simple_id'), Env('request_id')) obj_label = Format('Facture %s', Field('_simple_id')) obj_type = DocumentTypes.BILL def obj_date(self): date = Date(CleanText( './/div[has-class("a-span4") and not(has-class("recipient"))]/div[2]' ), parse_func=parse_french_date, dayfirst=True, default=NotAvailable)(self) if date is NotAvailable: return Date(CleanText( './/div[has-class("a-span3") and not(has-class("recipient"))]/div[2]' ), parse_func=parse_french_date, dayfirst=True)(self) return date def obj_price(self): currency = Env('currency')(self) return CleanDecimal( './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency, replace_dots=currency == u'EUR')(self) def obj_currency(self): currency = Env('currency')(self) return Currency( './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency)(self) def obj_url(self): async_page = Async('details').loaded_page(self) url = Link( '//a[contains(@href, "download")]|//a[contains(@href, "generated_invoices")]', default=NotAvailable)(async_page.doc) if not url: url = Link( '//a[contains(text(), "Imprimer un récapitulatif de commande")]' )(async_page.doc) return url def obj_format(self): if 'summary' in Field('url')(self): return 'html' return 'pdf'
def obj_id(self): if Field('price')(self) is NotAvailable: return '%s_%s%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y'), Field('_ht')(self)) else: return '%s_%s%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y'), Field('price')(self))
def obj_rdate(self): s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self) if not s: return Field('date')(self) s = s.replace('/', '') return Date(dayfirst=True).filter( '%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
def obj__is_json_histo(self): # For TYPE_REVOLVING_CREDIT, to get transaction if Field('type')(self) == Account.TYPE_REVOLVING_CREDIT and \ not Dict('produit')(self) in ('COMPTE_ALTERNA', 'AVANCE_PATRIMOINE'): return True # PLAN_EPARGNE_POPULAIRE account type history is not in json yet if Field('type')(self) == Account.TYPE_SAVINGS and \ not Dict('produit')(self) in ('PLAN_EPARGNE_POPULAIRE', ): return True
class item(ItemElement): klass = Investment obj_label = CleanText(TableCell('label')) obj_code = CleanText(TableCell('code')) obj_unitvalue = CleanDecimal(TableCell('unitvalue'), replace_dots=True) obj_quantity = CleanDecimal(TableCell('quantity'), replace_dots=True) obj_valuation = Eval(lambda x, y: x * y, Field('quantity'), Field('unitvalue')) obj_vdate = Date(CleanText(TableCell('vdate')), dayfirst=True)
class item(ItemElement): klass = Account obj_id = CleanText('./td[position()=2]') obj_balance = CleanDecimal('./td[position()=6]', replace_dots=True) obj_label = Format('Millésime %s', Field('id')) obj_number = Field('id') obj_currency = 'EUR' obj__page = Attr('./td//input', 'name')
def parse(self, el): i = None if CleanText(TableCell('code'))(self): i = Investment() i.label = Field('label')(self) i.code = unicode(TableCell('code')(self)[0].xpath('./text()[last()]')[0]).strip() i.quantity = MyDecimal(TableCell('quantity'))(self) i.valuation = Field('amount')(self) i.vdate = Field('date')(self) self.env['investments'] = [i] if i else []
def parse(self, el): self.env['label'] = CleanText('./h3/a')(self) or 'Carte Oney' self.env['_num'] = Attr('%s%s%s' % ('//option[contains(text(), "', Field('label')(self).replace('Ma ', ''), '")]'), 'value', default='')(self) self.env['id'] = Format('%s%s' % (self.page.browser.username, Field('_num')(self)))(self) # On the multiple accounts page, decimals are separated with dots, and separated with commas on single account page. amount_due = CleanDecimal('./p[@class = "somme-due"]/span[@class = "synthese-montant"]', default=None)(self) if amount_due is None: amount_due = CleanDecimal('./div[@id = "total-sommes-dues"]/p[contains(text(), "sommes dues")]/span[@class = "montant"]', replace_dots=True)(self) self.env['balance'] = - amount_due
def obj_rdate(self): if self.obj.rdate: # Transaction.Raw may have already set it return self.obj.rdate s = Regexp(Field('raw'), ' (\d{2}/\d{2}/\d{2}) | (?!NUM) (\d{6}) ', default=NotAvailable)(self) if not s: return Field('date')(self) s = s.replace('/', '') return Date(dayfirst=True).filter('%s%s%s%s%s' % (s[:2], '-', s[2:4], '-', s[4:]))
class item(ItemElement): klass = Account obj_currency = u'EUR' obj__id = CleanText('span[@class="account-number"]') obj_label = CleanText('span[@class="title"]') obj_id = AddPref(Field('_id'), Field('label')) obj_type = AddType(Field('label')) obj_balance = CleanDecimal('span[@class="solde"]/label', replace_dots=True) obj_coming = NotAvailable obj__jid = Attr('//input[@name="javax.faces.ViewState"]', 'value')
def load_details(self): if not Field('raw')(self).startswith('FACTURE CARTE'): return url = self.page.browser.transaction_detail.build() return self.page.browser.open(url, is_async=True, data={ 'type_mvt': self.detail_type_mvt, 'numero_mvt': Field('_trid')(self), })
def obj_type(self): type = self.page.TYPES.get( Dict('nature/codefamille')(self), Transaction.TYPE_UNKNOWN) if ((type == Transaction.TYPE_CARD and re.search(r' RELEVE DU \d+\.', Field('raw')(self))) or (type == Transaction.TYPE_UNKNOWN and re.search( r'FACTURE CARTE AFFAIRES \w{16} SUIVANT RELEVE DU \d{2}.\d{2}.\d{4}', Field('raw')(self)))): return Transaction.TYPE_CARD_SUMMARY return type
def obj_investments(self): tr_investment_list = Env('transaction_investments')(self).v isin = Field('_isin')(self) action = Field('_action')(self) if isin and action: tr_inv_key = (isin, action, Field('_datetime')(self)) try: return [tr_investment_list[tr_inv_key]] except KeyError: pass return []