def iter_subscription_list(self): fullname = CleanText('//div[@id="bloc_contenu_masituation"]/h3', replace=[('Titulaire du compte : ', '') ])(self.doc) number = re.sub( '[^\d]+', '', self.doc.xpath('//div[@id="bloc_contenu_masituation"]/ul/li') [2].text) sub = Subscription(number) sub._id = number sub.label = unicode(fullname) firstname = Regexp(RawText('//div[@id="bloc_contenu_masituation"]/h3'), '\t([^\xa0\t]+)\xa0[^:]')(self.doc) sub.subscriber = unicode(firstname) yield sub nb_childs = 0 childs = self.doc.xpath('//div[@class="bloc_infos"]') for child in childs: fullname = CleanText('.//h3[1]')(child) nb_childs = nb_childs + 1 number = "AFFILIE" + str(nb_childs) sub = Subscription(number) sub._id = number sub.label = unicode(fullname) firstname = Regexp(RawText('./h3'), '\t([^\xa0\t]+)\xa0[^:]')(child) sub.subscriber = unicode(firstname) yield sub
def get_balance(self, account_type): raw_balance = None for tr in self.doc.xpath('//div[@id="valorisation_compte"]//table/tr'): if account_type == Account.TYPE_MARKET: if u'Évaluation Titres' in CleanText('.')(tr): raw_balance = RawText('./td[2]')(tr) break elif 'Valorisation totale' in CleanText('.')(tr): raw_balance = RawText('./td[2]')(tr) return raw_balance
class get_torrent(ItemElement): klass = Torrent def obj_id(self): return self.page.url.split('/')[-1] def obj_url(self): return NotAvailable obj_name = CleanText('//div[@id="title"]') obj_magnet = CleanText( '//div[@class="download"]/a[starts-with(@href, "magnet:")]/@href') obj_date = Date( CleanText( '//div[@id="details"]//dt[.="Uploaded:"]/following-sibling::dd[1]' )) obj_size = Type(Regexp( CleanText( '//div[@id="details"]//dt[.="Size:"]/following-sibling::dd[1]' ), r'\((\d+) Bytes\)', '\\1'), type=float) obj_seeders = Type(CleanText( '//div[@id="details"]//dt[.="Seeders:"]/following-sibling::dd[1]'), type=int) obj_leechers = Type(CleanText( '//div[@id="details"]//dt[.="Leechers:"]/following-sibling::dd[1]' ), type=int) obj_description = RawText('//div[@class="nfo"]/pre', children=True)
class fill_paste(ItemElement): klass = PastealaconPaste obj_id = Env('id') obj_title = Regexp(CleanText('id("content")/h3'), r'Posted by (.+) on .+ \(') obj__date = DateTime( Regexp(CleanText('id("content")/h3'), r'Posted by .+ on (.+) \(')) obj_contents = RawText('//textarea[@id="code"]') def parse(self, el): # there is no 404, try to detect if there really is a content if len(el.xpath('id("content")/div[@class="syntax"]//ol')) != 1: raise PasteNotFound()
class get_video(ItemElement): klass = BaseVideo obj_nsfw = True obj_ext = 'mp4' obj_title = Attr('//meta[@property="og:title"]', 'content') obj_id = Env('id') obj__props = Eval(json.loads, Regexp(RawText('//script[contains(text(),"window.initials =")]'), r'window.initials = (.*);\n')) obj_duration = Base(Field('_props'), Dict('videoModel/duration')) obj_url = Base(Field('_props'), Dict('videoModel/mp4File')) def obj__page(self): return self.page.url
class fill_paste(ItemElement): klass = PastebinPaste def parse(self, el): self.env['header'] = el.find('//div[@id="content_left"]//div[@class="paste_box_info"]') obj_id = Env('id') obj_title = Base(Env('header'), CleanText('.//div[@class="paste_box_line1"]//h1')) obj_contents = RawText('//textarea[@id="paste_code"]') obj_public = Base( Env('header'), CleanVisibility(Attr('.//div[@class="paste_box_line1"]//img', 'title'))) obj__date = Base( Env('header'), DateTime(Attr('.//div[@class="paste_box_line2"]/span[1]', 'title')))
class item(ItemElement): klass = Account obj_id = CleanText('./td[2]') # Some account names have spaces in the middle which cause # the history search to fail if we remove them. # eg: `NAME SURNAME` = `NAME++SURNAME` in the history search. obj_label = Eval(lambda x: x.strip(), RawText('./td[1]')) obj_type = Account.TYPE_CARD obj__rib = Env('rib') obj__company = Env('company', default=None) # this field is something used to make the module work, not something meant to be displayed to end users obj_currency = 'EUR' obj_number = CleanText('./td[2]', replace=[(' ', '')]) obj_url = AbsoluteLink('./td[2]/a') obj__completeid = Format('%s:%s', obj_id, obj_label)
class get_video(ItemElement): klass = BaseVideo obj_nsfw = True obj_ext = 'mp4' obj_title = Attr('//meta[@property="og:title"]', 'content') obj_id = Env('id') obj__props = Eval( json.loads, Regexp(RawText('//script[contains(text(),"XPlayerTPL2")]'), r'XPlayerTPL2\(\n[^\n]+\n(.*),\n')) obj_duration = Base(Field('_props'), Dict('duration')) obj_url = Base(Field('_props'), Dict('sources/mp4/0/url')) def obj__page(self): return self.page.url
def get_list(self): accounts = [] for cpt in self.doc.xpath( '//div[contains(@class, " compte") and not(contains(@class, "compte_selected"))]' ): # ignore auto assurance accounts if 'aut' in cpt.get('class'): continue account = Account() account._history_link = Link( './ul/li/a[contains(@id, "consulter_solde") ' 'or contains(@id, "historique") ' 'or contains(@id, "contrat") ' 'or contains(@id, "assurance_vie_operations")]')(cpt) # this is to test if access to the accounts info is blocked for different reasons page = self.browser.open(account._history_link).page if isinstance(page, LoanPage): account = Loan() account._history_link = Link( './ul/li/a[contains(@id, "consulter_solde") ' 'or contains(@id, "historique") ' 'or contains(@id, "contrat") ' 'or contains(@id, "assurance_vie_operations")]')(cpt) if isinstance(page, LoanPage): account.id = CleanText( '(//p[@id="c_montantEmprunte"]//span[@class="valStatic"]//strong)[1]' )(cpt) account.label = CleanText( '(//p[@id="c_montantEmprunte"]//span[@class="valStatic"]//strong)[1]' )(cpt) account.type = Account.TYPE_LOAN account_history_page = self.browser.open( account._history_link).page account.total_amount = account_history_page.get_total_amount() account.next_payment_amount = account_history_page.get_next_payment_amount( ) account.next_payment_date = account_history_page.get_next_payment_date( ) account.account_label = account_history_page.get_account_label( ) account.subscription_date = account_history_page.get_subscription_date( ) account.maturity_date = account_history_page.get_maturity_date( ) if len(accounts) == 0: global_error_message = page.doc.xpath( '//div[@id="as_renouvellementMIFID.do_"]/div[contains(text(), "Bonjour")] ' '| //div[@id="as_afficherMessageBloquantMigration.do_"]//div[@class="content_message"] ' '| //p[contains(text(), "Et si vous faisiez de Fortuneo votre banque principale")] ' '| //div[@id="as_renouvellementMotDePasse.do_"]//p[contains(text(), "votre mot de passe")]' '| //div[@id="as_afficherSecuriteForteOTPIdentification.do_"]//span[contains(text(), "Pour valider ")]' ) if global_error_message: raise ActionNeeded(CleanText('.')(global_error_message[0])) local_error_message = page.doc.xpath( '//div[@id="error"]/p[@class="erreur_texte1"]') if local_error_message: raise BrowserUnavailable( CleanText('.')(local_error_message[0])) number = RawText('./a[contains(@class, "numero_compte")]')( cpt).replace(u'N° ', '') account.id = CleanText(None).filter(number).replace(u'N°', '') account._card_links = [] card_link = Link('./ul/li/a[contains(text(), "Carte bancaire")]', default='')(cpt) if len(card_link) > 0: account._card_links.append(card_link) account.label = CleanText( './a[contains(@class, "numero_compte")]/@title')(cpt) for pattern, type in self.ACCOUNT_TYPES.items(): if pattern in account._history_link: account.type = type break if account.type in { Account.TYPE_PEA, Account.TYPE_MARKET, Account.TYPE_LIFE_INSURANCE }: account._investment_link = Link( './ul/li/a[contains(@id, "portefeuille")]')(cpt) balance = self.browser.open( account._investment_link).page.get_balance(account.type) if account.type in {Account.TYPE_PEA, Account.TYPE_MARKET}: self.browser.investments[account.id] = list( self.browser.open( account._investment_link).page.get_investments( account)) else: balance = self.browser.open( account._history_link).page.get_balance() if account.type is not Account.TYPE_LOAN: account.coming = self.browser.open( account._history_link).page.get_coming() if account.type in {Account.TYPE_PEA, Account.TYPE_MARKET}: account.currency = self.browser.open( account._investment_link).page.get_currency() else: account.currency = account.get_currency(balance) account.balance = CleanDecimal(None, replace_dots=True).filter(balance) if account.type in (Account.TYPE_CHECKING, Account.TYPE_SAVINGS): # Need a token sent by SMS to customers account.iban = NotAvailable if (account.label, account.id, account.balance) not in [ (a.label, a.id, a.balance) for a in accounts ]: accounts.append(account) return accounts
def get_balance(self, account_type): for div in self.doc.xpath( '//div[@class="block synthese_vie"]/div/div/div'): if 'Valorisation' in CleanText('.')(div): return RawText('./p/strong')(div)
def test_first_node_is_element_recursive(self): e = fromstring('<html><body><p><span>229,90</span> EUR</p></body></html>') self.assertEqual("229,90 EUR", RawText('//p', default="foo", children=True)(e))
def test_first_node_has_no_recursion(self): e = fromstring('<html><body><p><span>229,90</span> EUR</p></body></html>') self.assertEqual("foo", RawText('//p', default="foo")(e))
def test_first_node_is_text(self): e = fromstring('<html><body><p>blah: <span>229,90</span> EUR</p></body></html>') self.assertEqual("blah: ", RawText('//p', default="foo")(e))
def test_first_node_is_element(self): e = fromstring('<html><body><p></p></body></html>') self.assertEqual("foo", RawText('//p', default="foo")(e))
def get_list(self): accounts = [] for cpt in self.doc.xpath( '//div[contains(@class, " compte") and not(contains(@class, "compte_selected"))]' ): # ignore auto assurance accounts if 'aut' in cpt.get('class'): continue account = Account() account._history_link = Link( './ul/li/a[contains(@id, "consulter_solde") ' 'or contains(@id, "historique") ' 'or contains(@id, "assurance_vie_operations")]')(cpt) # this is to test if there is a redirection to a form for recently created profiles if len(accounts) == 0: message = self.browser.open( account._history_link).page.doc.xpath( '//div[@id="as_renouvellementMIFID.do_"]') if message: raise ActionNeeded( CleanText('./div[contains(text(), "Bonjour")]')( message[0])) number = RawText('./a[contains(@class, "numero_compte")]')( cpt).replace(u'N° ', '') account.id = CleanText(None).filter(number).replace(u'N°', '') account._card_links = [] card_link = Link('./ul/li/a[contains(text(), "Carte bancaire")]', default='')(cpt) if len(card_link) > 0: account._card_links.append(card_link) account.label = CleanText( './a[contains(@class, "numero_compte")]/@title')(cpt) for pattern, type in self.ACCOUNT_TYPES.iteritems(): if pattern in account._history_link: account.type = type break if account.type in { Account.TYPE_PEA, Account.TYPE_MARKET, Account.TYPE_LIFE_INSURANCE }: account._investment_link = Link( './ul/li/a[contains(@id, "portefeuille")]')(cpt) balance = self.browser.open( account._investment_link).page.get_balance(account.type) if account.type in {Account.TYPE_PEA, Account.TYPE_MARKET}: self.browser.cache["investments"][account.id] = list( self.browser.open( account._investment_link).page.get_investments( account)) else: balance = self.browser.open( account._history_link).page.get_balance() if account.type in {Account.TYPE_PEA, Account.TYPE_MARKET}: account.currency = self.browser.open( account._investment_link).page.get_currency() else: account.currency = account.get_currency(balance) account.balance = CleanDecimal(None, replace_dots=True).filter(balance) if account.type in (Account.TYPE_CHECKING, Account.TYPE_SAVINGS): # Need a token sent by SMS to customers account.iban = NotAvailable if (account.label, account.id, account.balance) not in [ (a.label, a.id, a.balance) for a in accounts ]: accounts.append(account) return iter(accounts)