class TrainlineBrowser(LoginBrowser): BASEURL = 'https://www.trainline.fr' signin = URL(r'/api/v5/account/signin', SigninPage) user_page = URL(r'/api/v5/user', UserPage) documents_page = URL(r'/api/v5/pnrs', DocumentsPage) def __init__(self, login, password, *args, **kwargs): super(TrainlineBrowser, self).__init__(login, password, *args, **kwargs) self.session.headers['X-Requested-With'] = 'XMLHttpRequest' def do_login(self): try: self.signin.go(data={ 'email': self.username, 'password': self.password }) except ClientError as error: json_response = error.response.json() error_list = json_response.get('errors', {}).get('email', []) error_message = error_list[0] if error_list else None raise BrowserIncorrectPassword(error_message) self.session.headers[ 'Authorization'] = 'Token token="%s"' % self.page.get_token() @need_login def get_subscription_list(self): yield self.user_page.go().get_subscription() @need_login def iter_documents(self, subscription): min_date = date.today() docs = {} i = 0 while i < 10: params = {'date': min_date.strftime('%Y-%m-01')} # date params has a very silly behavior # * day seems to be useless, (but we have to put it anyway) # * server return last 3 months from date (including month we give) # ex: date = 2019-09-01 => return bills from 2019-07-01 to 2019-09-30 # * this date range behavior seems to not apply for old bills, # it can happens we get bill for 2017 even if we put date=2019-06-01 # it is possible maybe because it's the last ones and server doesn't want to new_doc = False try: self.documents_page.go(params=params) except ClientError as error: # CAUTION: if we perform too many request we can get a 429 response status code if error.response.status_code != 429: raise # wait 2 seconds and retry, it should work sleep(2) for doc in self.page.iter_documents(subid=subscription.id): if doc.id not in docs.keys(): new_doc = True docs[doc.id] = doc if min_date is None or min_date > doc.date: min_date = doc.date if not new_doc: min_date -= relativedelta(months=3) i += 1 return sorted(docs.values(), key=lambda doc: doc.date, reverse=True)
class Fortuneo(LoginBrowser, StatesMixin): BASEURL = 'https://mabanque.fortuneo.fr' STATE_DURATION = 5 login_page = URL(r'.*identification\.jsp.*', LoginPage) accounts_page = URL(r'/fr/prive/default.jsp\?ANav=1', r'.*prive/default\.jsp.*', r'.*/prive/mes-comptes/synthese-mes-comptes\.jsp', AccountsList) account_history = URL( r'.*/prive/mes-comptes/livret/consulter-situation/consulter-solde\.jsp.*', r'.*/prive/mes-comptes/compte-courant/consulter-situation/consulter-solde\.jsp.*', r'.*/prive/mes-comptes/compte-especes.*', AccountHistoryPage) card_history = URL( r'.*/prive/mes-comptes/compte-courant/carte-bancaire/encours-debit-differe\.jsp.*', CardHistoryPage) pea_history = URL(r'.*/prive/mes-comptes/pea/.*', r'.*/prive/mes-comptes/compte-titres-pea/.*', r'.*/prive/mes-comptes/ppe/.*', PeaHistoryPage) invest_history = URL(r'.*/prive/mes-comptes/assurance-vie/.*', InvestmentHistoryPage) loan_contract = URL( r'/fr/prive/mes-comptes/credit-immo/contrat-credit-immo/contrat-pret-immobilier.jsp.*', LoanPage) unavailable = URL(r'/customError/indispo.html', UnavailablePage) security_page = URL(r'/fr/prive/identification-carte-securite-forte.jsp.*', SecurityPage) # transfer recipients = URL( r'/fr/prive/mes-comptes/compte-courant/realiser-operations/gerer-comptes-externes/consulter-comptes-externes.jsp', r'/fr/prive/verifier-compte-externe.jsp', r'fr/prive/mes-comptes/compte-courant/.*/gestion-comptes-externes.jsp', RecipientsPage) recipient_sms = URL( r'/fr/prive/appel-securite-forte-otp-bankone.jsp', r'/fr/prive/mes-comptes/compte-courant/.*/confirmer-ajout-compte-externe.jsp', RecipientSMSPage) register_transfer = URL( r'/fr/prive/mes-comptes/compte-courant/realiser-operations/saisie-virement.jsp\?ca=(?P<ca>)', RegisterTransferPage) validate_transfer = URL( r'/fr/prive/mes-comptes/compte-courant/.*/verifier-saisie-virement.jsp', ValidateTransferPage) confirm_transfer = URL( r'fr/prive/mes-comptes/compte-courant/.*/init-confirmer-saisie-virement.jsp', r'/fr/prive/mes-comptes/compte-courant/.*/confirmer-saisie-virement.jsp', ConfirmTransferPage) fake_action_page = URL( r'fr/prive/mes-comptes/synthese-globale/synthese-mes-comptes.jsp', FakeActionPage) profile = URL(r'/fr/prive/informations-client.jsp', ProfilePage) profile_csv = URL(r'/PdfStruts\?*', ProfilePageCSV) need_reload_state = None __states__ = ['need_reload_state', 'add_recipient_form'] def __init__(self, *args, **kwargs): LoginBrowser.__init__(self, *args, **kwargs) self.investments = {} self.action_needed_processed = False self.add_recipient_form = None def do_login(self): if not self.login_page.is_here(): self.location('/fr/identification.jsp') self.page.login(self.username, self.password) if self.login_page.is_here(): self.page.check_is_blocked() raise BrowserIncorrectPassword() self.location('/fr/prive/default.jsp?ANav=1') if self.accounts_page.is_here() and self.page.need_sms(): raise AuthMethodNotImplemented( 'Authentification with sms is not supported') def load_state(self, state): # reload state only for new recipient feature if state.get('need_reload_state'): # don't use locate browser for add recipient step state.pop('url', None) super(Fortuneo, self).load_state(state) @need_login def get_investments(self, account): if hasattr(account, '_investment_link'): if account.id in self.investments: return self.investments[account.id] else: self.location(account._investment_link) return self.page.get_investments(account) return [] @need_login def get_history(self, account): self.location(account._history_link) if not account.type == Account.TYPE_LOAN: if self.page.select_period(): return sorted_transactions(self.page.get_operations()) return [] @need_login def get_coming(self, account): for cb_link in account._card_links: for _ in range(3): self.location(cb_link) if not self.page.is_loading(): break time.sleep(1) for tr in sorted_transactions(self.page.get_operations()): yield tr @need_login def get_accounts_list(self): self.accounts_page.go() # Note: if you want to debug process_action_needed() here, # you must first set self.action_needed_processed to False # otherwise it might not enter the "if" loop here below. if not self.action_needed_processed: self.process_action_needed() assert self.accounts_page.is_here() accounts_list = self.page.get_list() if self.fake_action_page.is_here(): # A false action needed is present, it's a choice to make Fortuno your main bank. # To avoid it, we need to first detect it on the account_page # Then make a post request to mimic the click on choice 'later' # And to finish we must to reload the page with a POST to get the accounts # before going on the accounts_page, which will have the data. self.location(self.absurl('ReloadContext?action=1&', base=True), method='POST') self.accounts_page.go() accounts_list = self.page.get_list() return accounts_list def process_action_needed(self): # we have to go in an iframe to know if there are CGUs url = self.page.get_iframe_url() if url: self.location( self.absurl(url, base=True) ) # beware, the landing page might vary according to the referer page. So far I didn't figure out how the landing page is chosen. if self.security_page.is_here(): # Some connections require reinforced security and we cannot bypass the OTP in order # to get to the account information. Users have to provide a phone number in order to # validate an OTP, so we must raise an ActionNeeded with the appropriate message. raise ActionNeeded( 'Cette opération sensible doit être validée par un code sécurité envoyé par SMS ou serveur vocal. ' 'Veuillez contacter le Service Clients pour renseigner vos coordonnées téléphoniques.' ) # if there are skippable CGUs, skip them if self.accounts_page.is_here() and self.page.has_action_needed(): # Look for the request in the event listener registered to the button # can be harcoded, no variable part. It is a POST request without data. self.location(self.absurl('ReloadContext?action=1&', base=True), method='POST') self.accounts_page.go( ) # go back to the accounts page whenever there was an iframe or not self.action_needed_processed = True @need_login def iter_recipients(self, origin_account): self.register_transfer.go(ca=origin_account._ca) if self.page.is_account_transferable(origin_account): for internal_recipient in self.page.iter_internal_recipients( origin_account_id=origin_account.id): yield internal_recipient self.recipients.go() for external_recipients in self.page.iter_external_recipients(): yield external_recipients def copy_recipient(self, recipient): rcpt = Recipient() rcpt.iban = recipient.iban rcpt.id = recipient.iban rcpt.label = recipient.label rcpt.category = recipient.category rcpt.enabled_at = datetime.now().replace(microsecond=0) + timedelta( days=1) rcpt.currency = u'EUR' return rcpt def new_recipient(self, recipient, **params): if 'code' in params: # to drop and use self.add_recipient_form instead in send_code() recipient_form = json.loads(self.add_recipient_form) self.send_code(recipient_form, params['code']) if self.page.rcpt_after_sms(): self.need_reload_state = None return self.copy_recipient(recipient) elif self.page.is_code_expired(): self.need_reload_state = True raise AddRecipientStep( recipient, Value( 'code', label= 'Le code sécurité est expiré. Veuillez saisir le nouveau code reçu qui sera valable 5 minutes.' )) assert False, self.page.get_error() return self.new_recipient_before_otp(recipient, **params) @need_login def new_recipient_before_otp(self, recipient, **params): self.recipients.go() self.page.check_external_iban_form(recipient) self.page.check_recipient_iban() # fill form self.page.fill_recipient_form(recipient) rcpt = self.page.get_new_recipient(recipient) # get first part of confirm form send_code_form = self.page.get_send_code_form() data = { 'appelAjax': 'true', 'domicileUpdated': 'false', 'numeroSelectionne.value': '', 'portableUpdated': 'false', 'proUpdated': 'false', 'typeOperationSensible': 'AJOUT_BENEFICIAIRE' } # this send sms to user self.location(self.absurl( '/fr/prive/appel-securite-forte-otp-bankone.jsp', base=True), data=data) # get second part of confirm form send_code_form.update(self.page.get_send_code_form_input()) # save form value and url for statesmixin self.add_recipient_form = dict(send_code_form) self.add_recipient_form.update({'url': send_code_form.url}) # storage can't handle dict with '.' in key # to drop when dict with '.' in key is handled self.add_recipient_form = json.dumps(self.add_recipient_form) self.need_reload_state = True raise AddRecipientStep( rcpt, Value('code', label='Veuillez saisir le code reçu.')) def send_code(self, form_data, code): form_url = form_data['url'] form_data['otp'] = code form_data.pop('url') self.location(self.absurl(form_url, base=True), data=form_data) @need_login def init_transfer(self, account, recipient, amount, label, exec_date): self.register_transfer.go(ca=account._ca) self.page.fill_transfer_form(account, recipient, amount, label, exec_date) return self.page.handle_response(account, recipient, amount, label, exec_date) @need_login def execute_transfer(self, transfer): self.page.validate_transfer() self.page.confirm_transfer() return self.page.transfer_confirmation(transfer) @need_login def get_profile(self): self.profile.go() csv_link = self.page.get_csv_link() if csv_link: self.location(csv_link) return self.page.get_profile() # The persons name is in a menu not returned in the ProfilePage, so # we have to go back to the AccountsPage (which is the main page for the website) # to get the info person = self.page.get_profile() self.accounts_page.go() self.page.fill_person_name(obj=person) return person
class BilletreducBrowser(PagesBrowser): BASEURL = 'http://www.billetreduc.com' search = URL(r'/recherche.htm', SearchPage) results = URL(r'/search.htm', ResultsPage) event = URL(r'/(?P<id>\d+)/evt.htm', EventPage) event_dates = URL(r'/(?P<id>\d+)/evtbook.htm', r'https://www.billetreduc.com/(?P<id>\d+)/evtbook.htm', EventDatesPage) book = URL(r'/evtBook.htm\?idevt=(?P<id>\d+)&dh=(?P<ymd>\d+-\d+-\d+)\+(?P<hm>\d+:\d+)') def set_id_end(self, event): event.id = '%s.%s' % (event.siteid, event.start_date.strftime('%Y-%m-%d.%H:%M')) event.end_date = event.start_date + timedelta(seconds=3600) def search_events(self, q): original_start = q.start_date or datetime.now() q = q.copy() start = q.start_date or datetime.now() start = start.replace(second=0, microsecond=0) end = q.end_date or start + timedelta(days=7) for date in iter_days(start, end): q.start_date = date self.search.go() self.page.search(q) for event in self.page.iter_events(date=date): for h, m in event._date_hours: event = event.copy() event.start_date = event.start_date.replace(hour=h, minute=m) self.set_id_end(event) if event.start_date >= original_start: yield event def get_event(self, _id): try: eid, ymd, hm = _id.split('.') except ValueError: return self.get_event_first(_id) else: return self.get_event_by_date(eid, ymd, hm) def get_event_first(self, eid): self.event.go(id=eid) event = self.page.get_event() self.event_dates.go(id=eid) self.page.get_first(event) self.set_id_end(event) return event def get_event_by_date(self, eid, ymd, hm): self.event.go(id=eid) event = self.page.get_event() s = '%sT%s' % (ymd, hm) event.start_date = datetime.strptime(s, '%Y-%m-%dT%H:%M') event.end_date = event.start_date + timedelta(seconds=3600) self.event_dates.go(id=eid) self.page.fetch_by_date(event, ymd, hm) self.set_id_end(event) return event
class AmeliProBrowser(LoginBrowser): BASEURL = 'https://espacepro.ameli.fr' loginp = URL( '/PortailPS/appmanager/portailps/professionnelsante\?_nfpb=true&_pageLabel=vp_login_page', LoginPage) homep = URL( '/PortailPS/appmanager/portailps/professionnelsante\?_nfpb=true&_pageLabel=vp_accueil_page', HomePage) accountp = URL( '/PortailPS/appmanager/portailps/professionnelsante\?_nfpb=true&_pageLabel=vp_coordonnees_infos_perso_page', AccountPage) billsp = URL( '/PortailPS/appmanager/portailps/professionnelsante\?_nfpb=true&_pageLabel=vp_releves_mensuels_page', BillsPage) searchp = URL( '/PortailPS/appmanager/portailps/professionnelsante\?_nfpb=true&_pageLabel=vp_recherche_par_date_paiements_page', SearchPage) historyp = URL( '/PortailPS/appmanager/portailps/professionnelsante\?_nfpb=true&_windowLabel=vp_recherche_paiement_tiers_payant_portlet_1&vp_recherche_paiement_tiers_payant_portlet_1_actionOverride=%2Fportlets%2Fpaiements%2Frecherche&_pageLabel=vp_recherche_par_date_paiements_page', HistoryPage) logged = False def do_login(self): self.logger.debug('call Browser.do_login') if self.logged: return True self.loginp.stay_or_go() if self.homep.is_here(): self.logged = True return True self.page.login(self.username, self.password) if not self.homep.is_here(): raise BrowserIncorrectPassword() self.logged = True @need_login def get_subscription_list(self): self.logger.debug('call Browser.get_subscription_list') self.accountp.stay_or_go() return self.page.iter_subscription_list() @need_login def get_subscription(self, id): return self.get_subscription_list() @need_login def iter_history(self, subscription): self.searchp.stay_or_go() date_deb = self.page.doc.xpath( '//input[@name="vp_recherche_paiement_tiers_payant_portlet_1dateDebutRecherche"]' )[0].value date_fin = self.page.doc.xpath( '//input[@name="vp_recherche_paiement_tiers_payant_portlet_1dateFinRecherche"]' )[0].value data = { 'vp_recherche_paiement_tiers_payant_portlet_1dateDebutRecherche': date_deb, 'vp_recherche_paiement_tiers_payant_portlet_1dateFinRecherche': date_fin, 'vp_recherche_paiement_tiers_payant_portlet_1codeOrganisme': 'null', 'vp_recherche_paiement_tiers_payant_portlet_1actionEvt': 'rechercheParDate', 'vp_recherche_paiement_tiers_payant_portlet_1codeRegime': '01', } self.session.headers.update( {'Content-Type': 'application/x-www-form-urlencoded'}) self.historyp.go(data=urlencode(data)) if self.historyp.is_here(): return self.page.iter_history() @need_login def get_details(self, sub): det = Detail() det.id = sub.id det.label = sub.label det.infos = '' det.price = Decimal('0.0') return det @need_login def iter_documents(self): self.billsp.stay_or_go() return self.page.iter_documents() @need_login def get_document(self, id): for b in self.iter_documents(): if id == b.id: return b return None @need_login def download_document(self, bill): request = self.open(bill.url, data=bill._data, stream=True) return request.content
class PastebinBrowser(LoginBrowser): BASEURL = 'https://pastebin.com/' warning = URL('warning\.php\?p=(?P<id>\d+)', WarningPage) api = URL('api/api_post\.php', RawPage) apilogin = URL('api/api_login\.php', RawPage) login = URL('login', LoginPage) userprofile = URL('u/(?P<username>.+)', UserPage) postpage = URL('$', PostPage) paste = URL('(?P<id>\w+)', PastePage) raw = URL('raw\.php\?i=(?P<id>\w+)', RawPage) def __init__(self, api_key, *args, **kwargs): super(PastebinBrowser, self).__init__(*args, **kwargs) self.api_key = api_key self.user_key = None # being connected is optionnal at the module level, so require # login only if an username is configured if self.username: self.post = need_login(self.post_paste) def fill_paste(self, paste): """ Get as much as information possible from the paste page """ try: return self.paste.stay_or_go(id=paste.id).fill_paste(paste) except BrowserHTTPNotFound: raise PasteNotFound() @paste.id2url def get_paste(self, url): m = self.paste.match(url) if m: return PastebinPaste(m.groupdict()['id']) def get_contents(self, _id): """ Get the contents from the raw URL This is the fastest and safest method if you only want the content. Returns unicode. """ try: return self.raw.open(id=_id).response.text except BrowserHTTPNotFound: raise PasteNotFound() def post_paste(self, paste, expiration=None): self.postpage.stay_or_go().post(paste, expiration=expiration) # We cannot call fill_paste because we often have a captcha # anti-spam page, and do not detect it. paste.id = self.page.params['id'] paste.url = self.paste.build(id=paste.id) def api_post_paste(self, paste, expiration=None): data = { 'api_dev_key': self.api_key, 'api_option': 'paste', 'api_paste_code': paste.contents } if self.password: data['api_user_key'] = self.api_login() if paste.public is True: data['api_paste_private'] = '0' elif paste.public is False: data['api_paste_private'] = '1' if paste.title: data['api_paste_name'] = paste.title if expiration: data['api_paste_expire_date'] = expiration res = self.open(self.api.build(), data=data, data_encoding='utf-8').text self._validate_api_response(res) paste.id = self.paste.match(res).groupdict()['id'] paste.url = self.paste.build(id=paste.id) def api_login(self): # "The api_user_key does not expire." # TODO store it on disk if self.user_key: return self.user_key data = { 'api_dev_key': self.api_key, 'api_user_name': self.username, 'api_user_password': self.password } res = self.open(self.apilogin.build(), data=data, data_encoding='utf-8').text try: self._validate_api_response(res) except BadAPIRequest as e: if str(e) == 'invalid login': raise BrowserIncorrectPassword() else: raise e self.user_key = res return res # TODO make it into a Page? def _validate_api_response(self, res): matches = re.match('Bad API request, (?P<error>.+)', res) if matches: raise BadAPIRequest(matches.groupdict().get('error')) def do_login(self): self.login.stay_or_go().login() self.page.login(self.username, self.password) if not self.page.logged: raise BrowserIncorrectPassword()
class BNPCompany(LoginBrowser): BASEURL = 'https://secure1.entreprises.bnpparibas.net' login = URL('/sommaire/jsp/identification.jsp', LoginPage) accounts = URL('/NCCPresentationWeb/e10_soldes/liste_soldes.do', AccountsPage) history = URL('/NCCPresentationWeb/e11_releve_op/listeOperations.do', HistoryPage) def do_login(self): assert isinstance(self.username, basestring) assert isinstance(self.password, basestring) assert self.password.isdigit() self.login.go() self.login.go() assert self.login.is_here() self.page.login(self.username, self.password) @need_login def get_accounts_list(self): self.accounts.go() return self.page.iter_accounts() @need_login def get_account(self, _id): return find_object(self.get_accounts_list(), id=_id, error=AccountNotFound) def get_transactions(self, id_account, typeReleve, dateMin, dateMax='null'): self.open( 'https://secure1.entreprises.bnpparibas.net/NCCPresentationWeb/e11_releve_op/init.do?e10=true' ) params = {} params['identifiant'] = id_account params['typeSole'] = 'C' params['typeReleve'] = typeReleve params['typeDate'] = 'O' params['ajax'] = 'true' params['dateMin'] = dateMin params['dateMax'] = dateMax self.history.go(params=params) return self.page.iter_history() @need_login def iter_history(self, account): return self.get_transactions(account.id, 'Comptable', (date.today() - timedelta(days=90)).strftime('%Y%m%d'), date.today().strftime('%Y%m%d')) @need_login def iter_coming_operations(self, account): return self.get_transactions(account.id, 'Previsionnel', (date.today().strftime('%Y%m%d'))) @need_login def iter_investment(self, account): raise NotImplementedError() @need_login def get_transfer_accounts(self): raise NotImplementedError() @need_login def transfer(self, account, to, amount, reason): raise NotImplementedError() @need_login def iter_threads(self): raise NotImplementedError() @need_login def get_thread(self, thread): raise NotImplementedError()
class IngBrowser(LoginBrowser): BASEURL = 'https://secure.ingdirect.fr' TIMEOUT = 60.0 DEFERRED_CB = 'deferred' IMMEDIATE_CB = 'immediate' # avoid relogin every time lifeback = URL(r'https://ingdirectvie.ingdirect.fr/b2b2c/entreesite/EntAccExit', ReturnPage) # Login and error loginpage = URL('/public/displayLogin.jsf.*', LoginPage) errorpage = URL('.*displayCoordonneesCommand.*', StopPage) actioneeded = URL('/general\?command=displayTRAlertMessage', '/protected/pages/common/eco1/moveMoneyForbidden.jsf', ActionNeededPage) # CapBank accountspage = URL('/protected/pages/index.jsf', '/protected/pages/asv/contract/(?P<asvpage>.*).jsf', AccountsList) titredetails = URL('/general\?command=display.*', TitreDetails) ibanpage = URL('/protected/pages/common/rib/initialRib.jsf', IbanPage) # CapBank-Market netissima = URL('/data/asv/fiches-fonds/fonds-netissima.html', NetissimaPage) starttitre = URL('/general\?command=goToAccount&zone=COMPTE', TitrePage) titrepage = URL('https://bourse.ingdirect.fr/priv/portefeuille-TR.php', TitrePage) titrehistory = URL('https://bourse.ingdirect.fr/priv/compte.php\?ong=3', TitreHistory) titrerealtime = URL('https://bourse.ingdirect.fr/streaming/compteTempsReelCK.php', TitrePage) titrevalue = URL('https://bourse.ingdirect.fr/priv/fiche-valeur.php\?val=(?P<val>.*)&pl=(?P<pl>.*)&popup=1', TitreValuePage) asv_history = URL('https://ingdirectvie.ingdirect.fr/b2b2c/epargne/CoeLisMvt', 'https://ingdirectvie.ingdirect.fr/b2b2c/epargne/CoeDetMvt', ASVHistory) asv_invest = URL('https://ingdirectvie.ingdirect.fr/b2b2c/epargne/CoeDetCon', ASVInvest) detailfonds = URL('https://ingdirectvie.ingdirect.fr/b2b2c/fonds/PerDesFac\?codeFonds=(.*)', DetailFondsPage) # CapDocument billpage = URL('/protected/pages/common/estatement/eStatement.jsf', BillsPage) # CapProfile profile = URL('/protected/pages/common/profil/(?P<page>\w+).jsf', ProfilePage) transfer = URL('/protected/pages/common/virement/index.jsf', TransferPage) __states__ = ['where'] def __init__(self, *args, **kwargs): self.birthday = kwargs.pop('birthday') self.where = None LoginBrowser.__init__(self, *args, **kwargs) self.cache = {} self.cache["investments_data"] = {} self.only_deferred_cards = {} def do_login(self): assert self.password.isdigit() assert self.birthday.isdigit() self.do_logout() self.loginpage.go() self.page.prelogin(self.username, self.birthday) self.page.login(self.password) if self.page.error(): raise BrowserIncorrectPassword() if self.errorpage.is_here(): raise BrowserIncorrectPassword('Please login on website to fill the form and retry') self.page.check_for_action_needed() @start_with_main_site def get_market_balance(self, account): if self.where != "start": self.accountspage.go() self.where = "start" data = self.get_investments_data(account) for i in range(5): if i > 0: self.logger.debug('Can\'t get market balance, retrying in %s seconds...', (2**i)) time.sleep(2**i) if self.accountspage.go(data=data).has_link(): break self.starttitre.go() self.where = u"titre" self.titrepage.go() self.titrerealtime.go() account.balance = self.page.get_balance() or account.balance self.cache["investments_data"][account.id] = self.page.doc or None @need_login @start_with_main_site def get_accounts_list(self, get_iban=True): self.accountspage.go() self.where = "start" for acc in self.page.get_list(): if get_iban and acc.type in [Account.TYPE_CHECKING, Account.TYPE_SAVINGS]: self.go_account_page(acc) acc.iban = self.ibanpage.go().get_iban() if get_iban and acc.type in (Account.TYPE_MARKET, Account.TYPE_PEA): self.get_market_balance(acc) yield acc def get_account(self, _id): return find_object(self.get_accounts_list(get_iban=False), id=_id, error=AccountNotFound) def go_account_page(self, account): data = {"AJAX:EVENTS_COUNT": 1, "AJAXREQUEST": "_viewRoot", "ajaxSingle": "index:setAccount", "autoScroll": "", "index": "index", "index:setAccount": "index:setAccount", "javax.faces.ViewState": account._jid, "cptnbr": account._id } self.accountspage.go(data=data) card_list = self.page.get_card_list() if card_list: self.only_deferred_cards[account._id] = all( [card['kind'] == self.DEFERRED_CB for card in card_list] ) self.where = "history" @need_login @start_with_main_site def get_coming(self, account): if account.type != Account.TYPE_CHECKING and\ account.type != Account.TYPE_SAVINGS: raise NotImplementedError() account = self.get_account(account.id) self.go_account_page(account) jid = self.page.get_history_jid() if jid is None: self.logger.info('There is no history for this account') return return self.page.get_coming() @need_login @start_with_main_site def get_history(self, account): if account.type in (Account.TYPE_MARKET, Account.TYPE_PEA, Account.TYPE_LIFE_INSURANCE): for result in self.get_history_titre(account): yield result return elif account.type != Account.TYPE_CHECKING and\ account.type != Account.TYPE_SAVINGS: raise NotImplementedError() account = self.get_account(account.id) self.go_account_page(account) jid = self.page.get_history_jid() only_deferred_cb = self.only_deferred_cards.get(account._id) if jid is None: self.logger.info('There is no history for this account') return if account.type == Account.TYPE_CHECKING: history_function = AccountsList.get_transactions_cc index = -1 # disable the index. It works without it on CC else: history_function = AccountsList.get_transactions_others index = 0 hashlist = set() while True: i = index for transaction in history_function(self.page, index=index): if only_deferred_cb and transaction.type == FrenchTransaction.TYPE_CARD: transaction.type = FrenchTransaction.TYPE_DEFERRED_CARD transaction.id = hashlib.md5(transaction._hash).hexdigest() while transaction.id in hashlist: transaction.id = hashlib.md5((transaction.id + "1").encode('ascii')).hexdigest() hashlist.add(transaction.id) i += 1 yield transaction # if there is no more transactions, it is useless to continue if self.page.islast() or i == index: return if index >= 0: index = i data = {"AJAX:EVENTS_COUNT": 1, "AJAXREQUEST": "_viewRoot", "autoScroll": "", "index": "index", "index:%s:moreTransactions" % jid: "index:%s:moreTransactions" % jid, "javax.faces.ViewState": account._jid } self.accountspage.go(data=data) @need_login @start_with_main_site def iter_recipients(self, account): self.transfer.go() if not self.page.able_to_transfer(account): return iter([]) self.page.go_to_recipient_selection(account) return self.page.get_recipients(origin=account) @need_login @start_with_main_site def init_transfer(self, account, recipient, transfer): self.transfer.go() self.page.do_transfer(account, recipient, transfer) return self.page.recap(account, recipient, transfer) @need_login @start_with_main_site def execute_transfer(self, transfer): self.page.confirm(self.password) return transfer def go_on_asv_detail(self, account, link): try: if self.page.asv_is_other: jid = self.page.get_asv_jid() data = {'index': "index", 'javax.faces.ViewState': jid, 'index:j_idcl': "index:asvInclude:goToAsvPartner"} self.accountspage.go(data=data) else: self.accountspage.go(asvpage="manageASVContract") self.page.submit() self.page.submit() self.location(link) return True except SSLError: return False def get_investments_data(self, account): return {"AJAX:EVENTS_COUNT": 1, "AJAXREQUEST": "_viewRoot", "ajaxSingle": "index:setAccount", "autoScroll": "", "index": "index", "index:setAccount": "index:setAccount", "javax.faces.ViewState": account._jid, "cptnbr": account._id } def go_investments(self, account): account = self.get_account(account.id) data = self.get_investments_data(account) # On ASV pages, data maybe not available. for i in range(5): if i > 0: self.logger.debug('Investments list empty, retrying in %s seconds...', (2**i)) time.sleep(2**i) if i > 1: self.do_logout() self.do_login() account = self.get_account(account.id) data['cptnbr'] = account._id data['javax.faces.ViewState'] = account._jid self.accountspage.go(data=data) if not self.page.has_error(): break else: self.logger.warning("Unable to get investments list...") if self.page.is_asv: return self.starttitre.go() self.where = u"titre" self.titrepage.go() @need_login @start_with_main_site def get_investments(self, account): if account.type not in (Account.TYPE_MARKET, Account.TYPE_PEA, Account.TYPE_LIFE_INSURANCE): raise NotImplementedError() self.go_investments(account) if self.where == u'titre': if self.cache["investments_data"].get(account.id) == None: self.titrerealtime.go() for inv in self.page.iter_investments(account): yield inv elif self.page.asv_has_detail or account._jid: self.accountspage.stay_or_go() shares = {} for asv_investments in self.page.iter_asv_investments(): shares[asv_investments.label] = asv_investments.portfolio_share if self.go_on_asv_detail(account, '/b2b2c/epargne/CoeDetCon') is not False: self.where = u"asv" for inv in self.page.iter_investments(): inv.portfolio_share = shares[inv.label] yield inv def get_history_titre(self, account): self.go_investments(account) if self.where == u'titre': self.titrehistory.go() elif self.page.asv_has_detail or account._jid: if self.go_on_asv_detail(account, '/b2b2c/epargne/CoeLisMvt') is False: return iter([]) else: return iter([]) transactions = list() for tr in self.page.iter_history(): transactions.append(tr) if self.asv_history.is_here(): for tr in transactions: page = tr._detail.result().page if tr._detail else None tr.investments = list(page.get_investments()) if page and 'numMvt' in page.url else [] self.lifeback.go() return iter(transactions) ############# CapDocument ############# @start_with_main_site @need_login def get_subscriptions(self): self.billpage.go() if self.loginpage.is_here(): self.do_login() return self.billpage.go().iter_account() else: return self.page.iter_account() @need_login def get_documents(self, subscription): self.billpage.go() data = {"AJAXREQUEST": "_viewRoot", "accountsel_form": "accountsel_form", subscription._formid: subscription._formid, "autoScroll": "", "javax.faces.ViewState": subscription._javax, "transfer_issuer_radio": subscription.id } self.billpage.go(data=data) return self.page.iter_documents(subid=subscription.id) def predownload(self, bill): self.page.postpredown(bill._localid) ############# CapProfile ############# @start_with_main_site @need_login def get_profile(self): profile = self.profile.go(page='coordonnees').get_profile() self.profile.go(page='infosperso').update_profile(profile) return profile
class EntreparticuliersBrowser(PagesBrowser): BASEURL = 'http://www.entreparticuliers.com' cities = URL( '/HTTPHandlers/LocalisationsAutocompleteHandler.ashx\?q=(?P<pattern>.*)', CitiesPage) form_item = URL('/Default.aspx/GetElementsMoteur') search = URL('/default.aspx/CreateSearchParams') search_result = URL('/default.aspx/GetAnnonces', SearchPage) housing = URL('/default.aspx/GetAnnonceDetail', HousingPage) def search_city(self, pattern): return self.cities.open(pattern=pattern).iter_cities() TYPES = {Query.TYPE_RENT: "1", Query.TYPE_SALE: "4"} RET = { Query.TYPE_RENT: { Query.HOUSE_TYPES.HOUSE: '2', Query.HOUSE_TYPES.APART: '1', Query.HOUSE_TYPES.LAND: '', Query.HOUSE_TYPES.PARKING: '4', Query.HOUSE_TYPES.OTHER: '6' }, Query.TYPE_SALE: { Query.HOUSE_TYPES.HOUSE: '2', Query.HOUSE_TYPES.APART: '1', Query.HOUSE_TYPES.LAND: '5', Query.HOUSE_TYPES.PARKING: '6', Query.HOUSE_TYPES.OTHER: '9' } } def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): if type not in self.TYPES: raise TypeNotSupported self.update_header() result = self.form_item.open(data="{'rubrique': '%s'}" % self.TYPES.get(type)) biens = json.loads(json.loads(result.content)['d']) for house_type in house_types: id_type = self.RET[type].get(house_type, '1') data = {} data['rubrique'] = self.TYPES.get(type) data['ach_id'] = None data['FromMoteur'] = "true" for bien in biens: if bien['Idchoix'] == int(id_type): data['lstSSTbien'] = bien['SsTypebien'] data['lstTbien'] = bien['TypeBien'] data['Caracteristique'] = bien['Idchoix'] data['OrigineAlerte'] = "SaveSearchMoteurHome" data['pays'] = "fra" data['prix_min'] = cost_min if cost_min and cost_min > 0 else None data['prix_max'] = cost_max if cost_max and cost_max > 0 else None data['lstThemes'] = "" min_rooms = nb_rooms if nb_rooms else None if not min_rooms: data['lstNbPieces'] = 0 else: data['lstNbPieces'] = ','.join('%s' % n for n in range(min_rooms, 6)) data['lstNbChambres'] = None data['surface_min'] = area_min if area_min else None # var localisationType = { "all": -1, "ville": 5, "region": 2, "departement": 4, "pays": 1, "regionUsuelle": 3 }; data['localisationType'] = 5 data['reference'] = '' """ Avec un rayon a 0, on remonte en priorité les resultats les plus proches, puis de plus en plus eloignes sans limite aucune. On choisit donc arbitrairement de limiter a 100km autour de la ville choisie """ data['rayon'] = 100 data['localisation_id_rayon'] = None data['lstLocalisationId'] = ','.join(cities) data['photos'] = 0 data['colocation'] = '' data['meuble'] = '' data['pageNumber'] = 1 data['order_by'] = 1 data['sort_order'] = 1 data['top'] = 25 data['SaveSearch'] = "false" data['EmailUser'] = "" data['GSMUser'] = "" self.search.go(data="{'p_SearchParams':'%s', 'forcealerte':'0'}" % json.dumps(data)) data = '{pageIndex: 1,source:"undefined",latmin:"0",latmax:"0",lngmin:"0",lngmax:"0"}' for item in self.search_result.go(data=data).iter_housings(): yield item def get_housing(self, _id, obj=None): self.update_header() splitted_id = _id.split('#') data = '{idannonce: %s,source:"%s",rubrique:%s}' % ( splitted_id[0], splitted_id[2], splitted_id[1]) obj = self.housing.go(data=data).get_housing(obj=obj) obj.id = _id return obj def update_header(self): self.session.headers.update({ "X-Requested-With": "XMLHttpRequest", "Content-Type": "application/json; charset=utf-8", "Accept": "application/json, text/javascript, */*; q=0.01" })
class NobleartBrowser(LoginBrowser): BASEURL = 'https://membres.temple-nobleart.fr' login = URL('/login', '/sessions', LoginPage) account = URL('/account', AccountPage) lesson = URL('/lessons/(?P<lesson_id>\d+)', '/lessons.json', LessonPage) def location(self, *args, **kwargs): r = super(NobleartBrowser, self).location(*args, **kwargs) print("# Now on %s" % self.url) return r def do_login(self): token = self.login.go().login(self.username, self.password) if self.login.is_here(): raise BrowserIncorrectPassword() self.session.headers['x-csrf-token'] = token self.session.headers['x-requested-with'] = "XMLHttpRequest" def get_lessons(self): start = datetime.now().strftime('%Y-%m-%d') end = (datetime.now() + relativedelta(days=5)).strftime('%Y-%m-%d') timestamp = int(round(time.time() * 1000)) params = { 'start': start, 'end': end, '_': timestamp, 'timezone': u"Europe/Paris" } return json.loads(self.location('lessons.json', params=params).content) def is_full(self, lesson_id): lesson = [ x for x in self.get_lessons() if x['html_id'] == "lesson_%s" % lesson_id ] return None if len(lesson) != 1 else lesson[0]['full_lesson'] def check_timeslot(self): hour = int(datetime.now().strftime('%H')) if 2 < hour < 6: print( colored("# Night time ! Trying again in 10 minutes...", 'blue')) time.sleep(600) sys.exit(1) @need_login def register(self, lesson_id): self.check_timeslot() if self.is_full(lesson_id) is False: self.lesson.go(lesson_id=lesson_id) self.page.register() else: print( colored("# Lesson is full ! Trying again in 15 seconds...", 'yellow')) time.sleep(5) self.register(lesson_id) @need_login def show_lessons(self, name, is_coach=False): found = [] for lesson in self.get_lessons(): lesson_id = lesson['html_id'].split('_')[-1] coach = re.findall('Prof[^>]+.([\w\s-]+)', lesson['description'])[0] places_reserved, places_total = re.findall( '\d+', lesson['description'])[-2:] if not is_coach: if int(places_reserved) == 0: continue self.lesson.go(lesson_id=lesson_id) if (not is_coach and self.page.is_registered(name)) or (is_coach and name in coach): date = lesson['end'].split('T')[0] schedule = re.findall('(?<=>)[^\d]+(.+)', lesson['formatted_title'])[-1].replace( '-', 'to') txt = "%s - %s - %s at %s" % (lesson['title'], coach, date, schedule) found.append({'id': lesson_id, 'txt': txt}) name = "%s %s" % ("Coach" if is_coach else "Partner", name) if len(found): print(colored('# %s found in :' % name, 'green')) for lesson in found: print( colored( '# Lesson #%s : %s' % (lesson['id'], lesson['txt']), 'cyan')) else: print(colored('# %s not registered in any lesson.' % name, 'red'))
class ArteBrowser(PagesBrowser): BASEURL = 'http://arte.tv/' webservice = URL( 'papi/tvguide/(?P<class_name>.*)/(?P<method_name>.*)/(?P<parameters>.*).json', 'http://(?P<__site>.*).arte.tv/(?P<_lang>\w{2})/player/(?P<_id>.*)', 'https://api.arte.tv/api/player/v1/config/(?P<__lang>\w{2})/(?P<vid>.*)\?vector=(?P<___site>.*)', ArteJsonPage) videos_list = URL( 'http://(?P<site>.*).arte.tv/(?P<lang>\w{2})/?(?P<cat>.*?)', VideosListPage) video_page = URL('http://(?P<_site>.*).arte.tv/(?P<id>.+)', VideoPage) def __init__(self, lang, quality, order, format, version, *args, **kwargs): self.order = order self.lang = (value for key, value in LANG.items if key == lang).next() self.version = (value for key, value in VERSION_VIDEO.items if self.lang.get('label') in value.keys() and version == key).next() self.quality = (value for key, value in QUALITY.items if key == quality).next() self.format = format if self.lang.get('label') not in self.version.keys(): raise UserError('%s is not available for %s' % (self.lang.get('label'), version)) PagesBrowser.__init__(self, *args, **kwargs) def search_videos(self, pattern): class_name = 'videos/plus7' method_name = 'search' parameters = '/'.join([ self.lang.get('webservice'), 'L1', pattern, 'ALL', 'ALL', '-1', self.order, '10', '0' ]) return self.webservice.go(class_name=class_name, method_name=method_name, parameters=parameters).iter_videos() def get_video(self, id, video=None): class_name = 'videos' method_name = 'stream/player' parameters = '/'.join([self.lang.get('webservice'), id, 'ALL', 'ALL']) video = self.webservice.go(class_name=class_name, method_name=method_name, parameters=parameters).get_video(obj=video) video.ext, video.url = self.get_url() return video def get_url(self): url = self.page.get_video_url(self.quality, self.format, self.version.get(self.lang.get('label')), self.lang.get('version')) if format == FORMATS.HLS: ext = u'm3u8' url = self.get_m3u8_link(url) else: ext = u'mp4' url = url return ext, url def get_m3u8_link(self, url): r = self.openurl(url) baseurl = url.rpartition('/')[0] links_by_quality = [] for line in r.readlines(): if not line.startswith('#'): links_by_quality.append(u'%s/%s' % (baseurl, line.replace('\n', ''))) if len(links_by_quality): try: return links_by_quality[self.quality[1]] except: return links_by_quality[0] return NotAvailable def get_video_from_program_id(self, _id): class_name = 'epg' method_name = 'program' parameters = '/'.join([self.lang.get('webservice'), 'L2', _id]) video = self.webservice.go(class_name=class_name, method_name=method_name, parameters=parameters).get_program_video() if video: return self.get_video(video.id, video) def latest_videos(self): class_name = 'videos' method_name = 'plus7' parameters = '/'.join([ self.lang.get('webservice'), 'L1', 'ALL', 'ALL', '-1', self.order, '10', '0' ]) return self.webservice.go(class_name=class_name, method_name=method_name, parameters=parameters).iter_videos() def get_arte_programs(self): class_name = 'epg' method_name = 'clusters' parameters = '/'.join([self.lang.get('webservice'), '0', 'ALL']) return self.webservice.go( class_name=class_name, method_name=method_name, parameters=parameters).iter_programs(title=self.lang.get('title')) def get_arte_program_videos(self, program): class_name = 'epg' method_name = 'cluster' parameters = '/'.join([self.lang.get('webservice'), program[-1]]) available_videos = self.webservice.go( class_name=class_name, method_name=method_name, parameters=parameters).iter_program_videos() for item in available_videos: video = self.get_video_from_program_id(item.id) if video: yield video def get_arte_concert_categories(self): return self.videos_list.go(site=SITE.CONCERT.get('id'), lang=self.lang.get('site'), cat='').iter_arte_concert_categories() def get_arte_concert_videos(self, cat): return self.videos_list.go( site=SITE.CONCERT.get('id'), lang=self.lang.get('site'), cat='').iter_arte_concert_videos(cat=cat[-1]) def get_arte_concert_video(self, id, video=None): json_url = self.video_page.go(_site=SITE.CONCERT.get('id'), id=id).get_json_url() m = re.search( 'http://(?P<__site>.*).arte.tv/(?P<_lang>\w{2})/player/(?P<_id>.*)', json_url) if m: video = self.webservice.go( __site=m.group('__site'), _lang=m.group('_lang'), _id=m.group('_id')).get_arte_concert_video(obj=video) video.id = u'%s.%s' % (video._site, id) video.ext, video.url = self.get_url() return video def get_arte_cinema_categories(self, cat=[]): menu = self.videos_list.go(site=SITE.CINEMA.get('id'), lang=self.lang.get('site'), cat='').get_arte_cinema_menu() menuSplit = map(lambda x: x.split("/")[2:], menu) result = {} for record in menuSplit: here = result for item in record[:-1]: if item not in here: here[item] = {} here = here[item] if "end" not in here: here["end"] = [] here["end"].append(record[-1]) cat = cat if not cat else cat[1:] for el in cat: result = result.get(el) if "end" in result.keys(): return self.page.iter_arte_cinema_categories(cat='/'.join(cat)) else: categories = [] for item in result.keys(): categories.append( Collection([SITE.CINEMA.get('id'), unicode(item)], unicode(item))) return categories def get_arte_cinema_videos(self, cat): return self.videos_list.go(site=SITE.CINEMA.get('id'), lang=self.lang.get('site'), cat='/%s' % '/'.join(cat[1:])).get_arte_cinema_videos() def get_arte_cinema_video(self, id, video=None): json_url = self.video_page.go(_site=SITE.CINEMA.get('id'), id=id).get_json_url() m = re.search( 'https://api.arte.tv/api/player/v1/config/(\w{2})/(.*)\?vector=(.*)\&.*', json_url) if m: video = self.webservice.go( __lang=m.group(1), vid=m.group(2), ___site=m.group(3)).get_arte_cinema_video(obj=video) video.ext, video.url = self.get_url() video.id = id return video
class EdfBrowser(LoginBrowser, StatesMixin): BASEURL = 'https://particulier.edf.fr' home = URL('/fr/accueil/contrat-et-conso/mon-compte-edf.html', HomePage) authenticate = URL(r'https://espace-client.edf.fr/sso/json/authenticate', AuthenticatePage) authorize = URL( r'https://espace-client.edf.fr/sso/oauth2/INTERNET/authorize', AuthorizePage) wrong_password = URL( r'https://espace-client.edf.fr/connexion/mon-espace-client/templates/openam/authn/PasswordAuth2.html', WrongPasswordPage) check_authenticate = URL('/services/rest/openid/checkAuthenticate', CheckAuthenticatePage) user_status = URL('/services/rest/checkuserstatus/getUserStatus') not_connected = URL('/fr/accueil/connexion/mon-espace-client.html', UnLoggedPage) connected = URL('/fr/accueil/espace-client/tableau-de-bord.html', WelcomePage) profil = URL('/services/rest/authenticate/getListContracts', ProfilPage) csrf_token = URL(r'/services/rest/init/initPage\?_=(?P<timestamp>.*)', ProfilPage) documents = URL('/services/rest/edoc/getMyDocuments', DocumentsPage) bills = URL('/services/rest/edoc/getBillsDocuments', DocumentsPage) bill_informations = URL('/services/rest/document/dataUserDocumentGetX', DocumentsPage) bill_download = URL( r'/services/rest/document/getDocumentGetXByData' r'\?csrfToken=(?P<csrf_token>.*)&dn=(?P<dn>.*)&pn=(?P<pn>.*)' r'&di=(?P<di>.*)&bn=(?P<bn>.*)&an=(?P<an>.*)', BillDownload) profile = URL('/services/rest/context/getCustomerContext', ProfilePage) __states__ = ['id_token1', 'otp_data'] def __init__(self, config, *args, **kwargs): self.config = config self.otp_data = None self.id_token1 = None kwargs['username'] = self.config['login'].get() kwargs['password'] = self.config['password'].get() super(EdfBrowser, self).__init__(*args, **kwargs) def locate_browser(self, state): pass def do_login(self): # ********** admire how login works on edf par website ********** # login part on edf particulier website is very tricky # FIRST time we connect we have an otp, BUT not password, we can't know if it is wrong at this moment # SECOND time we use password, and not otp auth_params = {'realm': '/INTERNET'} if self.config['otp'].get(): self.otp_data['callbacks'][0]['input'][0]['value'] = self.config[ 'otp'].get() headers = { 'X-Requested-With': 'XMLHttpRequest', } self.authenticate.go(json=self.otp_data, params=auth_params, headers=headers) self.id_token1 = self.page.get_data( )['callbacks'][1]['output'][0]['value'] # id_token1 is VERY important, we keep it indefinitely, without it edf will ask again otp else: self.location('/bin/edf_rc/servlets/sasServlet', params={'processus': 'TDB'}) if self.connected.is_here(): # we are already logged # sometimes even if password is wrong, you can be logged if you retry self.logger.info('already logged') return self.authenticate.go(method='POST', params=auth_params) data = self.page.get_data() data['callbacks'][0]['input'][0]['value'] = self.username self.authenticate.go(json=data, params=auth_params) data = self.page.get_data( ) # yes, we have to get response and send it again, beautiful isn't it ? if data['stage'] == 'UsernameAuth2': # username is wrong raise BrowserIncorrectPassword( data['callbacks'][1]['output'][0]['value']) if self.id_token1: data['callbacks'][0]['input'][0]['value'] = self.id_token1 else: # the FIRST time we connect, we don't have id_token1, we have no choice, we'll receive an otp data['callbacks'][0]['input'][0]['value'] = ' ' self.authenticate.go(json=data, params=auth_params) data = self.page.get_data() assert data['stage'] in ( 'HOTPcust3', 'PasswordAuth2'), 'stage is %s' % data['stage'] if data['stage'] == 'HOTPcust3': # OTP part if self.id_token1: # this shouldn't happen except if id_token1 expire one day, who knows... self.logger.warning( 'id_token1 is not null but edf ask again for otp') # a legend say this url is the answer to life the universe and everything, because it is use EVERYWHERE in login self.authenticate.go(json=self.page.get_data(), params=auth_params) self.otp_data = self.page.get_data() label = self.otp_data['callbacks'][0]['output'][0]['value'] raise BrowserQuestion(Value('otp', label=label)) if data['stage'] == 'PasswordAuth2': # password part data['callbacks'][0]['input'][0]['value'] = self.password self.authenticate.go(json=self.page.get_data(), params=auth_params) # should be SetPasAuth2 if password is ok if self.page.get_data()['stage'] == 'PasswordAuth2': attempt_number = self.page.get_data( )['callbacks'][1]['output'][0]['value'] # attempt_number is the number of wrong password msg = self.wrong_password.go().get_wrongpass_message( attempt_number) raise BrowserIncorrectPassword(msg) data = self.page.get_data() # yes, send previous data again, i know i know self.authenticate.go(json=data, params=auth_params) self.session.cookies['ivoiream'] = self.page.get_token() self.user_status.go() """ call check_authenticate url before get subscription in profil, or we'll get an error 'invalid session' we do nothing with this response (which contains false btw) but edf website expect we call it before or will reject us """ self.check_authenticate.go() def get_csrf_token(self): return self.csrf_token.go(timestamp=int(time())).get_token() @need_login def get_subscription_list(self): return self.profil.stay_or_go().iter_subscriptions() @need_login def iter_documents(self, subscription): self.documents.go( ) # go to docs before, else we get an error, thanks EDF return self.bills.go().iter_bills(subid=subscription.id) @retry(BrokenPageError, tries=2, delay=4) @need_login def download_document(self, document): token = self.get_csrf_token() bills_informations = self.bill_informations.go( headers={ 'Content-Type': 'application/json;charset=UTF-8', 'Accept': 'application/json, text/plain, */*' }, data=json.dumps({ 'bpNumber': document._bp, 'csrfToken': token, 'docId': document._doc_number, 'docName': 'FACTURE', 'numAcc': document._num_acc, 'parNumber': document._par_number })).get_bills_informations() self.bill_download.go(csrf_token=token, dn='FACTURE', pn=document._par_number, di=document._doc_number, bn=bills_informations.get('bpNumber'), an=bills_informations.get('numAcc')) # sometimes we land to another page that tell us, this document doesn't exist, but just sometimes... # make sure this page is the right one to avoid return a html page as document if not self.bill_download.is_here(): raise BrokenPageError() return self.page.content @need_login def get_profile(self): self.profile.go() return self.page.get_profile()
class HSBC(LoginBrowser): BASEURL = 'https://client.hsbc.fr' app_gone = False connection = URL(r'https://www.hsbc.fr/1/2/hsbc-france/particuliers/connexion', LoginPage) login = URL(r'https://www.hsbc.fr/1/*', LoginPage) cptPage = URL(r'/cgi-bin/emcgi.*\&Cpt=.*', r'/cgi-bin/emcgi.*\&Epa=.*', r'/cgi-bin/emcgi.*\&CPT_IdPrestation.*', r'/cgi-bin/emcgi.*\&Ass_IdPrestation.*', CPTOperationPage) cbPage = URL(r'/cgi-bin/emcgi.*\&Cb=.*', r'/cgi-bin/emcgi.*\&CB_IdPrestation.*', CBOperationPage) appGone = URL(r'/.*_absente.html', r'/pm_absent_inter.html', AppGonePage) accounts = URL(r'/cgi-bin/emcgi', AccountsPage) def __init__(self, username, password, secret, *args, **kwargs): self.accounts_list = dict() self.secret = secret LoginBrowser.__init__(self, username, password, *args, **kwargs) def load_state(self, state): return def prepare_request(self, req): preq = super(HSBC, self).prepare_request(req) conn = self.session.adapters['https://'].get_connection(preq.url) conn.ssl_version = ssl.PROTOCOL_TLSv1 return preq def do_login(self): self.connection.go() self.page.login(self.username) no_secure_key_link = self.page.get_no_secure_key() if not no_secure_key_link: raise BrowserIncorrectPassword() self.location(no_secure_key_link) self.page.login_w_secure(self.password, self.secret) for _ in range(2): if self.login.is_here(): self.page.useless_form() home_url = self.page.get_frame() if not home_url or not self.page.logged: raise BrowserIncorrectPassword() self.location(home_url) @need_login def get_accounts_list(self): self.update_accounts_list() for i,a in self.accounts_list.items(): yield a @need_login def update_accounts_list(self): for a in list(self.accounts.stay_or_go().iter_accounts()): try: self.accounts_list[a.id]._link_id = a._link_id except KeyError: self.accounts_list[a.id] = a @need_login def get_history(self, account, coming=False): if account._link_id is None: return if account._link_id.startswith('javascript') or '&Crd=' in account._link_id: raise NotImplementedError() self.location(self.accounts_list[account.id]._link_id) #If we relogin on hsbc, all link have change if self.app_gone: self.app_gone = False self.update_accounts_list() self.location(self.accounts_list[account.id]._link_id) if self.page is None: return if self.cbPage.is_here(): guesser = LinearDateGuesser(date_max_bump=timedelta(45)) return [tr for tr in self.page.get_history(date_guesser=guesser) if (coming and tr.date > date.today()) or (not coming and tr.date <= date.today())] elif not coming: return self._get_history() else: raise NotImplementedError() def _get_history(self): for tr in self.page.get_history(): yield tr
class AmeliBrowser(LoginBrowser): BASEURL = 'https://assure.ameli.fr' loginp = URL( 'https://assure.ameli.fr/PortailAS/appmanager/PortailAS/assure\?.*_pageLabel=as_login_page', LoginPage) homep = URL( '/PortailAS/appmanager/PortailAS/assure\?_nfpb=true&_pageLabel=as_accueil_page', HomePage) accountp = URL( '/PortailAS/appmanager/PortailAS/assure\?_nfpb=true&_pageLabel=as_info_perso_page', AccountPage) paymentsp = URL( '/PortailAS/appmanager/PortailAS/assure\?_nfpb=true&_pageLabel=as_paiements_page', PaymentsPage) paymentdetailsp = URL( '/PortailAS/paiements.do\?actionEvt=chargerDetailPaiements.*', PaymentDetailsPage) lastpaymentsp = URL( '/PortailAS/paiements.do\?actionEvt=afficherPaiements.*', LastPaymentsPage) pdf_page = URL( r'PortailAS/PDFServletReleveMensuel.dopdf\?PDF.moisRecherche=.*', Raw) def do_login(self): self.logger.debug('call Browser.do_login') self.loginp.stay_or_go() if self.homep.is_here(): return True self.page.login(self.username, self.password) error = self.page.is_error() if error: raise BrowserIncorrectPassword(error) self.homep.stay_or_go( ) # Redirection not interpreted by browser. Mannually redirect on homep if not self.homep.is_here(): raise BrowserIncorrectPassword() @need_login def iter_subscription_list(self): self.logger.debug('call Browser.iter_subscription_list') self.accountp.stay_or_go() return self.page.iter_subscription_list() @need_login def get_subscription(self, id): self.logger.debug('call Browser.get_subscription') assert isinstance(id, basestring) for sub in self.iter_subscription_list(): if id == sub._id: return sub return None @need_login def iter_history(self, sub): self.logger.debug('call Browser.iter_history') self.paymentsp.stay_or_go() payments_url = self.page.get_last_payments_url() self.location(payments_url) assert self.lastpaymentsp.is_here() urls = self.page.iter_last_payments() for url in urls: self.location(url) assert self.paymentdetailsp.is_here() for payment in self.page.iter_payment_details(sub): yield payment @need_login def iter_documents(self, sub): self.logger.debug('call Browser.iter_documents') self.paymentsp.stay_or_go() payments_url = self.page.get_last_payments_url() self.location(payments_url) assert self.lastpaymentsp.is_here() for document in self.page.iter_documents(sub): yield document @need_login def get_document(self, id): self.logger.debug('call Browser.get_document') assert isinstance(id, basestring) subs = self.iter_subscription_list() for sub in subs: for b in self.iter_documents(sub): if id == b.id: return b return False
class ResidentadvisorBrowser(LoginBrowser): BASEURL = 'http://www.residentadvisor.net' # this ID is used by Resident Advisor ALBANIA_ID = 223 login = URL('https://www.residentadvisor.net/login', LoginPage) event = URL('/event.aspx\?(?P<id>\d+)', EventPage) list_events = URL('/events.aspx\?ai=(?P<city>\d+)&v=(?P<v>.+)&yr=(?P<year>\d{4})&mn=(?P<month>\d\d?)&dy=(?P<day>\d\d?)', ListPage) search_page = URL('/search.aspx\?searchstr=(?P<query>.+)§ion=events&titles=1', SearchPage) attends = URL('/Output/addhandler.ashx') def do_login(self): self.login.stay_or_go() self.page.login(self.username, self.password) # in case of successful connection, we are redirected to the home page if self.login.is_here(): raise BrowserIncorrectPassword() def get_events(self, city, v = 'week', date = datetime.now()): self.list_events.go(v = v, year = date.year, month = date.month, day = date.day, city = city) assert self.list_events.is_here() for event in self.page.get_events(): yield event def get_event(self, _id): self.event.go(id = _id) if not self.event.is_here(): return None event = self.page.get_event() event.id = _id event.url = self.event.build(id = _id) return event def search_events_by_summary(self, pattern): self.search_page.go(query = pattern) assert self.search_page.is_here() for event in self.page.get_events(): yield event def get_country_city_id(self, country, city): now = datetime.now() self.list_events.go(v = 'day', year = now.year, month = now.month, day = now.day, city = self.ALBANIA_ID) assert self.list_events.is_here() country_id = self.page.get_country_id(country) if country_id is None: return None self.list_events.go(v = 'day', year = now.year, month = now.month, day = now.day, city = country_id) assert self.list_events.is_here() city_id = self.page.get_city_id(city) if city_id is None: return None return city_id def get_city_id(self, city): now = datetime.now() country_id = self.ALBANIA_ID city_id = None while True: self.list_events.go(v = 'day', year = now.year, month = now.month, day = now.day, city = country_id) assert self.list_events.is_here() city_id = self.page.get_city_id(city) country_id = self.page.get_country_id_next_to(country_id) # city_id != None => city found # country_id = None => no more country, city not found if city_id is not None or country_id is None: break return city_id @need_login def attends_event(self, id, is_attending): data = {'type': 'saveFavourite', 'action':'attending', 'id': id} if not is_attending: data['type'] = 'deleteFavourite' self.attends.open(data = data)
class HSBC(LoginBrowser): BASEURL = 'https://client.hsbc.fr' app_gone = False connection = URL(r'https://www.hsbc.fr/1/2/hsbc-france/particuliers/connexion', LoginPage) login = URL(r'https://www.hsbc.fr/1/*', LoginPage) cptPage = URL(r'/cgi-bin/emcgi.*\&Cpt=.*', r'/cgi-bin/emcgi.*\&Epa=.*', r'/cgi-bin/emcgi.*\&CPT_IdPrestation.*', r'/cgi-bin/emcgi.*\&Ass_IdPrestation.*', CPTOperationPage) cbPage = URL(r'/cgi-bin/emcgi.*\&Cb=.*', r'/cgi-bin/emcgi.*\&CB_IdPrestation.*', CBOperationPage) appGone = URL(r'/.*_absente.html', r'/pm_absent_inter.html', '/appli_absente_MBEL.html', AppGonePage) rib = URL(r'/cgi-bin/emcgi', RibPage) accounts = URL(r'/cgi-bin/emcgi', AccountsPage) # separated space life_insurances = URL('https://assurances.hsbc.fr/navigation', LifeInsurancesPage) def __init__(self, username, password, secret, *args, **kwargs): self.accounts_list = dict() self.secret = secret LoginBrowser.__init__(self, username, password, *args, **kwargs) def load_state(self, state): return def prepare_request(self, req): preq = super(HSBC, self).prepare_request(req) conn = self.session.adapters['https://'].get_connection(preq.url) conn.ssl_version = ssl.PROTOCOL_TLSv1 return preq def do_login(self): self.connection.go() self.page.login(self.username) no_secure_key_link = self.page.get_no_secure_key() if not no_secure_key_link: raise BrowserIncorrectPassword() self.location(no_secure_key_link) self.page.login_w_secure(self.password, self.secret) for _ in range(2): if self.login.is_here(): self.page.useless_form() self.js_url = self.page.get_js_url() home_url = self.page.get_frame() if not home_url or not self.page.logged: raise BrowserIncorrectPassword() self.location(home_url) @need_login def get_accounts_list(self): if not self.accounts_list: self.update_accounts_list() for i, a in self.accounts_list.items(): yield a @need_login def update_accounts_list(self): for a in list(self.accounts.stay_or_go().iter_accounts()): try: self.accounts_list[a.id]._link_id = a._link_id except KeyError: self.accounts_list[a.id] = a self.location('%s%s' % (self.page.url, '&debr=COMPTES_RIB')) self.page.get_rib(self.accounts_list) @need_login def _quit_li_space(self): if self.life_insurances.is_here(): self.page.disconnect_order() try: self.session.cookies.pop('ErisaSession') self.session.cookies.pop('HBFR-INSURANCE-COOKIE-82') except KeyError: pass home_url = self.page.get_frame() self.js_url = self.page.get_js_url() self.location(home_url) @need_login def _go_to_life_insurance(self, lfid): self._quit_li_space() url = (self.js_url + 'PLACEMENTS_ASS').split('?') data = {} for k, v in parse_qs(url[1]).iteritems(): data[k] = v[0] self.location(url[0], data=data).page.redirect_li_space() self.life_insurances.go(data={'url_suivant': 'PARTIEGENERIQUEB2C'}) data = {'url_suivant': 'SITUATIONCONTRATB2C', 'strNumAdh': ''} for attr, value in self.page.get_lf_attributes(lfid).iteritems(): data[attr] = value self.life_insurances.go(data=data) @need_login def get_history(self, account, coming=False): if account._link_id is None: return if account._link_id.startswith('javascript') or '&Crd=' in account._link_id: raise NotImplementedError() if account.type == Account.TYPE_LIFE_INSURANCE: if coming is True: raise NotImplementedError() try: self._go_to_life_insurance(account.id) except (XMLSyntaxError, HTTPNotFound, AccountNotFound): self._quit_li_space() return iter([]) except HTTPNotFound: self.accounts.go() return iter([]) self.life_insurances.go(data={'url_suivant': 'HISTORIQUECONTRATB2C', 'strMonnaie': 'EURO'}) history = [t for t in self.page.iter_history()] self._quit_li_space() return history try: self.location(self.accounts_list[account.id]._link_id) except HTTPNotFound: # sometime go to hsbc life insurance space do logout self.app_gone = True self.do_logout() self.do_login() #If we relogin on hsbc, all link have change if self.app_gone: self.app_gone = False self.update_accounts_list() self.location(self.accounts_list[account.id]._link_id) if self.page is None: return if self.cbPage.is_here(): guesser = LinearDateGuesser(date_max_bump=timedelta(45)) return [tr for tr in self.page.get_history(date_guesser=guesser) if (coming and tr.date > date.today()) or (not coming and tr.date <= date.today())] elif not coming: return self._get_history() else: raise NotImplementedError() def _get_history(self): for tr in self.page.get_history(): yield tr def get_investments(self, account): if account.type != Account.TYPE_LIFE_INSURANCE: raise NotImplementedError() try: self._go_to_life_insurance(account.id) except (XMLSyntaxError, HTTPNotFound, AccountNotFound): self._quit_li_space() return iter([]) except HTTPNotFound: self.accounts.go() return iter([]) investments = [i for i in self.page.iter_investments()] self._quit_li_space() return investments
class LCLBrowser(LoginBrowser): BASEURL = 'https://particuliers.secure.lcl.fr' login = URL('/outil/UAUT/Authentication/authenticate', '/outil/UAUT\?from=.*', '/outil/UWER/Accueil/majicER', '/outil/UWER/Enregistrement/forwardAcc', LoginPage) contracts = URL('/outil/UAUT/Contrat/choixContrat.*', '/outil/UAUT/Contract/getContract.*', '/outil/UAUT/Contract/selectContracts.*', '/outil/UAUT/Accueil/preRoutageLogin', '.*outil/UAUT/Contract/routing', ContractsPage) home = URL('/outil/UWHO/Accueil/', HomePage) accounts = URL('/outil/UWSP/Synthese', AccountsPage) history = URL('/outil/UWLM/ListeMouvements.*/accesListeMouvements.*', '/outil/UWLM/DetailMouvement.*/accesDetailMouvement.*', '/outil/UWLM/Rebond', AccountHistoryPage) rib = URL('/outil/UWRI/Accueil/detailRib', '/outil/UWRI/Accueil/listeRib', RibPage) cb_list = URL('/outil/UWCB/UWCBEncours.*/listeCBCompte.*', CBListPage) cb_history = URL('/outil/UWCB/UWCBEncours.*/listeOperations.*', CBHistoryPage) skip = URL('/outil/UAUT/Contrat/selectionnerContrat.*', '/index.html') no_perm = URL('/outil/UAUT/SansDroit/affichePageSansDroit.*', NoPermissionPage) bourse = URL('https://bourse.secure.lcl.fr/netfinca-titres/servlet/com.netfinca.frontcr.synthesis.HomeSynthesis', 'https://bourse.secure.lcl.fr/netfinca-titres/servlet/com.netfinca.frontcr.account.*', '/outil/UWBO.*', BoursePage) disc = URL('https://bourse.secure.lcl.fr/netfinca-titres/servlet/com.netfinca.frontcr.login.ContextTransferDisconnect', '/outil/UAUT/RetourPartenaire/retourCar', DiscPage) assurancevie = URL('/outil/UWVI/AssuranceVie/accesSynthese', AVPage) avdetail = URL('https://ASSURANCE-VIE-et-prevoyance.secure.lcl.fr.*', 'https://assurance-vie-et-prevoyance.secure.lcl.fr.*', '/outil/UWVI/Routage', AVDetailPage) loans = URL('/outil/UWCR/SynthesePar/', LoansPage) TIMEOUT = 30.0 def do_login(self): assert isinstance(self.username, basestring) assert isinstance(self.password, basestring) assert self.password.isdigit() # we force the browser to go to login page so it's work even # if the session expire self.login.go() if not self.page.login(self.username, self.password) or \ (self.login.is_here() and self.page.is_error()) : raise BrowserIncorrectPassword("invalid login/password.\nIf you did not change anything, be sure to check for password renewal request\non the original web site.\nAutomatic renewal will be implemented later.") self.accounts.stay_or_go() @need_login def connexion_bourse(self): self.location('/outil/UWBO/AccesBourse/temporisationCar?codeTicker=TICKERBOURSECLI') if self.no_perm.is_here(): return False self.location(self.page.get_next()) self.bourse.stay_or_go() return True def deconnexion_bourse(self): self.disc.stay_or_go() self.page.come_back() self.page.come_back() @need_login def get_accounts_list(self): self.assurancevie.stay_or_go() if self.no_perm.is_here(): self.logger.warning('Life insurances are unavailable.') else: for a in self.page.get_list(): yield a self.accounts.stay_or_go() accounts = list() for acc in self.page.get_list(): self.location('/outil/UWRI/Accueil/') self.rib.go(data={'compte': '%s/%s/%s' % (acc.id[0:5],acc.id[5:11],acc.id[11:])}) if self.rib.is_here(): acc.iban = self.page.get_iban() accounts.append(acc) self.loans.stay_or_go() for acc in self.page.get_list(): yield acc if self.connexion_bourse(): acc = list(self.page.populate(accounts)) self.deconnexion_bourse() # Disconnecting from bourse portal before returning account list # to be sure that we are on the banque portal for a in acc: yield a else: for a in accounts: yield a @need_login def get_history(self, account): if not hasattr(account, '_link_id') or not account._link_id: return self.location(account._link_id) for tr in self.page.get_operations(): yield tr for tr in self.get_cb_operations(account, 1): yield tr @need_login def get_cb_operations(self, account, month=0): """ Get CB operations. * month=0 : current operations (non debited) * month=1 : previous month operations (debited) """ if not hasattr(account, '_coming_links'): return for link in account._coming_links: v = urlsplit(self.absurl(link)) args = dict(parse_qsl(v.query)) args['MOIS'] = month self.location('%s?%s' % (v.path, urllib.urlencode(args))) for tr in self.page.get_operations(): yield tr for card_link in self.page.get_cards(): self.location(card_link) for tr in self.page.get_operations(): yield tr def disc_from_AV_investment_detail(self): self.page.come_back() self.page.sub() self.page.come_back() @need_login def get_investment(self, account): if account.type == Account.TYPE_LIFE_INSURANCE and account._form: self.assurancevie.stay_or_go() account._form.submit() self.page.sub() self.page.sub() for inv in self.page.iter_investment(): yield inv self.disc_from_AV_investment_detail() elif hasattr(account, '_market_link') and account._market_link: self.connexion_bourse() self.location(account._market_link) for inv in self.page.iter_investment(): yield inv self.deconnexion_bourse()
class CmsoProBrowser(LoginBrowser): login = URL( '/banque/assurance/credit-mutuel/pro/accueil\?espace=professionnels', LoginPage) choice_link = URL('/domiweb/accueil.jsp', ChoiceLinkPage) subscription = URL( '/domiweb/prive/espacesegment/selectionnerAbonnement/0-selectionnerAbonnement.act', SubscriptionPage) accounts = URL( '/domiweb/prive/professionnel/situationGlobaleProfessionnel/0-situationGlobaleProfessionnel.act', AccountsPage) history = URL( '/domiweb/prive/professionnel/situationGlobaleProfessionnel/1-situationGlobaleProfessionnel.act', HistoryPage) useless = URL( '/domiweb/prive/particulier/modificationMotDePasse/0-expirationMotDePasse.act', UselessPage) investment = URL( '/domiweb/prive/particulier/portefeuilleSituation/0-situationPortefeuille.act', InvestmentPage) invest_account = URL( r'/domiweb/prive/particulier/portefeuilleSituation/2-situationPortefeuille.act\?(?:csrf=[^&]*&)?indiceCompte=(?P<idx>\d+)&idRacine=(?P<idroot>\d+)', InvestmentAccountPage) def __init__(self, website, *args, **kwargs): super(CmsoProBrowser, self).__init__(*args, **kwargs) self.BASEURL = "https://www.%s" % website self.areas = None def do_login(self): self.login.stay_or_go() try: self.page.login(self.username, self.password) except BrowserHTTPError as e: # Yes, I know... In the Wild Wild Web, nobody respects nothing if e.response.status_code in (500, 401): raise BrowserIncorrectPassword() else: raise self.fetch_areas() def fetch_areas(self): if self.areas is None: self.subscription.go() self.areas = list(self.page.get_areas()) @need_login def iter_accounts(self): self.fetch_areas() # Manage multiple areas if not self.areas: raise BrowserIncorrectPassword( "Vous n'avez pas de comptes sur l'espace professionnel de ce site." ) seen = set() for area in self.areas: self.subscription.stay_or_go() self.location(area) try: for a in self.accounts.go().iter_accounts(): seenkey = (a.id, a._owner) if seenkey in seen: self.logger.warning( 'skipping seemingly duplicate account %r', a) continue a._area = area seen.add(seenkey) yield a except ServerError: self.logger.warning('Area not unavailable.') @need_login def iter_history(self, account): self.fetch_areas() if account._history_url.startswith('javascript:'): raise NotImplementedError() # Manage multiple areas self.subscription.go() self.location(account._area) self.accounts.go() # Query history for 6 last months def format_date(d): return datetime.date.strftime(d, '%d/%m/%Y') today = datetime.date.today() period = (today - relativedelta(months=6), today) query = {'date': ''.join(map(format_date, period))} # Let's go self.location(account._history_url) first_page = self.page rest_page = self.history.go(data=query) date_guesser = LinearDateGuesser() return chain( first_page.iter_history(date_guesser=date_guesser), reversed(list(rest_page.iter_history(date_guesser=date_guesser)))) @need_login def iter_coming(self, account): raise NotImplementedError() @need_login def iter_investment(self, account): self.fetch_areas() self.subscription.go() self.location(account._area) self.investment.go() assert self.investment.is_here() for page_account in self.page.iter_accounts(): if page_account.id == account.id: if page_account._formdata: self.page.go_account(*page_account._formdata) else: self.location(page_account.url) break else: # not an investment account return [] if self.investment.is_here(): assert self.page.has_error() self.logger.warning('account %r does not seem to be usable', account) return [] assert self.invest_account.is_here() invests = list(self.page.iter_investments()) assert len( invests ) < 2, 'implementation should be checked with more than 1 investment' # FIXME return invests
class CreditDuNordBrowser(LoginBrowser): ENCODING = 'UTF-8' login = URL('$', '/.*\?.*_pageLabel=page_erreur_connexion', LoginPage) redirect = URL('/swm/redirectCDN.html', RedirectPage) av = URL('/vos-comptes/particuliers/V1_transactional_portal_page_', AVPage) accounts = URL('/vos-comptes/particuliers', '/vos-comptes/particuliers/transac_tableau_de_bord', AccountsPage) transactions = URL('/vos-comptes/.*/transac/particuliers', TransactionsPage) proaccounts = URL('/vos-comptes/(professionnels|entreprises)', ProAccountsPage) protransactions = URL( '/vos-comptes/.*/transac/(professionnels|entreprises)', ProTransactionsPage) loans = URL('/vos-comptes/professionnels/credit_en_cours', ProAccountsPage) iban = URL( '/vos-comptes/IPT/cdnProxyResource/transacClippe/RIB_impress.asp', IbanPage) account_type = 'particuliers' def __init__(self, website, *args, **kwargs): super(CreditDuNordBrowser, self).__init__(*args, **kwargs) self.BASEURL = "https://%s" % website def is_logged(self): return self.page is not None and not self.login.is_here() and \ not self.page.doc.xpath(u'//b[contains(text(), "vous devez modifier votre code confidentiel")]') def home(self): if self.is_logged(): self.location('/vos-comptes/%s' % self.account_type) self.location( self.page.doc.xpath(u'//a[contains(text(), "Synthèse")]') [0].attrib['href']) else: self.do_login() def do_login(self): self.login.go().login(self.username, self.password) if self.login.is_here(): raise BrowserIncorrectPassword(self.page.get_error()) if not self.is_logged(): raise BrowserIncorrectPassword() m = re.match('https://[^/]+/vos-comptes/(\w+).*', self.url) if m: self.account_type = m.group(1) @need_login def _iter_accounts(self): self.home() self.location(self.page.get_av_link()) if self.av.is_here(): for a in self.page.get_av_accounts(): self.location(a._link, data=a._args) self.location(a._link.replace("_attente", "_detail_contrat_rep"), data=a._args) self.page.fill_diff_currency(a) yield a self.home() for a in self.page.get_list(): yield a self.loans.go() for a in self.page.get_list(): yield a @need_login def get_accounts_list(self): accounts = list(self._iter_accounts()) self.page.iban_page() link = self.page.iban_go() if self.page.has_iban(): for a in [a for a in accounts if a._acc_nb]: self.location(link + a._acc_nb) a.iban = self.page.get_iban() return accounts def get_account(self, id): for a in self._iter_accounts(): if a.id == id: return a return None @need_login def iter_transactions(self, link, args, acc_type): if args is None: return while args is not None: self.location(link, data=args) assert self.transactions.is_here() for tr in self.page.get_history(acc_type): yield tr args = self.page.get_next_args(args) @need_login def get_history(self, account, coming=False): if coming and account.type is not Account.TYPE_CARD or account.type is Account.TYPE_LOAN: return [] transactions = [] for tr in self.iter_transactions(account._link, account._args, account.type): transactions.append(tr) return transactions @need_login def get_investment(self, account): if not account._inv: return [] investments = [] if account.type in (Account.TYPE_MARKET, Account.TYPE_PEA): self.location(account._link, data=account._args) investments = [i for i in self.page.get_market_investment()] elif (account.type == Account.TYPE_LIFE_INSURANCE): self.location(account._link, data=account._args) self.location(account._link.replace("_attente", "_detail_contrat_rep"), data=account._args) investments = [i for i in self.page.get_deposit_investment()] return investments @need_login def get_profile(self): self.home() return self.page.get_profile()
class ApecBrowser(PagesBrowser): BASEURL = 'https://cadres.apec.fr' PROFILE = JsonProfile() start = 0 json_count = URL('/cms/webservices/rechercheOffre/count', IdsPage) json_ids = URL('/cms/webservices/rechercheOffre/ids', IdsPage) json_offre = URL('/cms/webservices/offre/public\?numeroOffre=(?P<_id>.*)', OffrePage) def create_parameters(self, pattern='', fonctions='[]', lieux='[]', secteursActivite='[]', typesContrat='[]', typesConvention='[]', niveauxExperience='[]', salaire_min='', salaire_max='', date_publication='', start=0, range=20): if date_publication: date_publication = ',"anciennetePublication":%s' % ( date_publication) if salaire_max: salaire_max = ',"salaireMaximum":%s' % (salaire_max) if salaire_min: salaire_min = ',"salaireMinimum":%s' % (salaire_min) return '{"activeFiltre":true,"motsCles":"%s","fonctions":%s,"lieux":%s,"pointGeolocDeReference":{},"secteursActivite":%s,"typesContrat":%s,"typesConvention":%s,"niveauxExperience":%s%s%s%s,"sorts":[{"type":"SCORE","direction":"DESCENDING"}],"pagination":{"startIndex":%s,"range":%s},"typeClient":"CADRE"}' % ( pattern, fonctions, lieux, secteursActivite, typesContrat, typesConvention, niveauxExperience, salaire_min, salaire_max, date_publication, start, range) def search_job(self, pattern=None): data = self.create_parameters(pattern=pattern).encode('utf-8') return self.get_job_adverts(data, pattern=pattern) def get_job_adverts(self, data, pattern='', lieux='', fonctions='', secteursActivite='', salaire_min='', salaire_max='', typesContrat='', date_publication='', niveauxExperience='', typesConvention=''): count = self.json_count.go(data=data).get_adverts_number() self.start = 0 if count: ids = self.json_ids.go(data=data).iter_job_adverts( pattern=pattern, fonctions='[%s]' % fonctions, lieux='[%s]' % lieux, secteursActivite='[%s]' % secteursActivite, typesContrat='[%s]' % typesContrat, niveauxExperience='[%s]' % niveauxExperience, typesConvention='[%s]' % typesConvention, salaire_min=salaire_min, salaire_max=salaire_max, date_publication=date_publication, start=self.start, count=count, range=20) for _id in ids: yield self.json_offre.go(_id=_id.id).get_job_advert() def get_job_advert(self, _id, advert=None): return self.json_offre.go(_id=_id).get_job_advert(obj=advert) def advanced_search_job(self, region='', fonction='', secteur='', salaire='', contrat='', limit_date='', level=''): salaire_max = '' salaire_min = '' if salaire: s = salaire.split('|') salaire_max = s[1] salaire_min = s[0] data = self.create_parameters(fonctions='[%s]' % fonction, lieux='[%s]' % region, secteursActivite='[%s]' % secteur, typesContrat='[%s]' % contrat, niveauxExperience='[%s]' % level, salaire_min=salaire_min, salaire_max=salaire_max, date_publication=limit_date) return self.get_job_adverts(data, fonctions=fonction, lieux=region, secteursActivite=secteur, typesContrat=contrat, niveauxExperience=level, salaire_min=salaire_min, salaire_max=salaire_max, date_publication=limit_date)
class TwitterBrowser(LoginBrowser): BASEURL = 'https://twitter.com/' authenticity_token = None thread_page = URL(u'(?P<user>.+)/status/(?P<_id>.+)', ThreadPage) login_error = URL(u'login/error.+', LoginErrorPage) tweet = URL(u'i/tweet/create', Tweet) trends = URL( u'i/trends\?pc=true&show_context=false&src=search-home&k=(?P<token>.*)', TrendsPage) search = URL(u'i/search/timeline', SearchTimelinePage) search_page = URL(u'search\?q=(?P<pattern>.+)&src=sprv', u'search-home', SearchPage) profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/tweets', HomeTimelinePage) timeline = URL(u'i/timeline', TimelinePage) login = URL(u'', LoginPage) def do_login(self): self.login.stay_or_go() if not self.authenticity_token: self.authenticity_token = self.page.login(self.username, self.password) if not self.page.logged or self.login_error.is_here(): raise BrowserIncorrectPassword() @need_login def get_me(self): return self.login.stay_or_go().get_me() @need_login def iter_threads(self): return self.timeline.go().iter_threads() def get_trendy_subjects(self): if self.username: return self.get_logged_trendy_subject() else: return self.trends.open(token="").get_trendy_subjects() def get_logged_trendy_subject(self): if not self.authenticity_token: self.do_login() trends_token = self.search_page.open().get_trends_token() return self.trends.open(token=trends_token).get_trendy_subjects() @need_login def post(self, thread, message): datas = {'place_id': '', 'tagged_users': ''} datas['authenticity_token'] = self.authenticity_token datas['status'] = message if thread: datas['in_reply_to_status_id'] = thread.id.split('#')[-1] self.tweet.open(data=datas) def get_thread(self, _id, thread=None, seen=None): splitted_id = _id.split('#') if not thread: thread = self.thread_page.go( _id=splitted_id[1].split('.')[-1], user=splitted_id[0]).get_thread(obj=thread) title_content = thread.title.split('\n\t')[-1] thread.root = Message(thread=thread, id=splitted_id[1].split('.')[-1], title=title_content[:50] if len(title_content) > 50 else title_content, sender=splitted_id[0], receivers=None, date=thread.date, parent=thread.root, content=title_content, signature=u'', children=[]) if seen and (_id not in seen): thread.root.flags = Message.IS_UNREAD comments = self.thread_page.stay_or_go( _id=splitted_id[1].split('.')[-1], user=splitted_id[0]).iter_comments() for comment in comments: comment.thread = thread comment.parent = thread.root if seen and comment.id not in seen.keys(): comment.flags = Message.IS_UNREAD thread.root.children.append(comment) return thread def get_tweets_from_profil(self, path): return self.profil.go(path=path).iter_threads() def get_tweets_from_hashtag(self, path): return self.get_tweets_from_search( u'#%s' % path if not path.startswith('#') else path) def get_tweets_from_search(self, path): min_position = self.search_page.go(pattern=path).get_min_position() params = {'q': "%s" % path, 'src': 'sprv'} return self.search.go(params=params).iter_threads( params=params, min_position=min_position)
class PhpBB(LoginBrowser): forum = URL(r'.*index.php', r'/$', r'.*viewforum.php\?f=(\d+)', r'.*search.php\?.*', ForumPage) topic = URL(r'.*viewtopic.php\?.*', TopicPage) posting = URL(r'.*posting.php\?.*', PostingPage) login = URL(r'.*ucp.php\?mode=login.*', LoginPage) last_board_msg_id = None def __init__(self, url, *args, **kwargs): self.BASEURL = url super(PhpBB, self).__init__(*args, **kwargs) def home(self): self.location(self.BASEURL) def do_login(self): data = { 'login': '******', 'username': self.username, 'password': self.password, } self.location('ucp.php?mode=login', data=data) if not self.page.logged: raise BrowserIncorrectPassword(self.page.get_error_message()) @need_login def get_root_feed_url(self): self.home() return self.page.get_feed_url() @need_login def iter_links(self, url): if url: self.location(url) else: self.home() assert self.forum.is_here() return self.page.iter_links() @need_login def iter_posts(self, id, stop_id=None): if id.startswith('http'): self.location(id) else: self.location('%s/%s' % (self.BASEURL, id2url(id))) assert self.topic.is_here() parent = 0 while True: for post in self.page.iter_posts(): if stop_id and post.id >= stop_id: return post.parent = parent yield post parent = post.id if self.page.cur_page == self.page.tot_pages: return self.location(self.page.next_page_url()) @need_login def riter_posts(self, id, stop_id=None): if id.startswith('http'): self.location(id) else: self.location('%s/%s' % (self.BASEURL, id2url(id))) assert self.topic.is_here() child = None while True: for post in self.page.riter_posts(): if child: child.parent = post.id yield child if post.id <= stop_id: return child = post if self.page.cur_page == 1: if child: yield child return self.location(self.page.prev_page_url()) @need_login def get_post(self, id): if id.startswith('http'): self.location(id) id = url2id(id) else: self.location('%s/%s' % (self.BASEURL, id2url(id))) assert self.topic.is_here() post = self.page.get_post(int(id.split('.')[-1])) if not post: return None if post.parent == 0 and self.page.cur_page > 1: self.location(self.page.prev_page_url()) post.parent = self.page.get_last_post_id() return post @need_login def get_forums(self): self.home() return dict(self.page.iter_all_forums()) @need_login def post_answer(self, forum_id, topic_id, title, content): if topic_id == 0: if not forum_id: forums = self.get_forums() forums_prompt = 'Forums list:\n%s' % ('\n'.join( ['\t- %s' % f for f in forums.values()])) m = re.match('\[(.*)\] (.*)', title or '') if not m: raise CantSendMessage( 'Please enter a title formatted like that:\n\t"[FORUM] SUBJECT"\n\n%s' % forums_prompt) forum_id = None for k, v in forums.items(): if v.lower() == m.group(1).lower(): forum_id = k break if not forum_id: raise CantSendMessage('Forum "%s" not found.\n\n%s' % (m.group(1), forums_prompt)) self.location('%s/posting.php?mode=post&f=%d' % (self.BASEURL, forum_id)) assert self.posting.is_here() self.page.post(title, content) assert self.posting.is_here() error = self.page.get_error_message() if error: raise CantSendMessage(u'Unable to send message: %s' % error) else: self.location('%s/%s' % (self.BASEURL, id2url('%s.%s' % (forum_id, topic_id)))) assert self.topic.is_here() self.page.go_reply() assert self.posting.is_here() # Don't send title because it isn't needed in real use case # and with monboob title is something like: # Re: [Forum Name] Re: Topic Name if title is not None and title.startswith('Re:'): title = None self.page.post(title, content) assert self.posting.is_here() or self.topic.is_here() error = self.page.get_error_message() if error: raise CantSendMessage(u'Unable to send message: %s' % error)
class DLFP(LoginBrowser): BASEURL = 'https://linuxfr.org/' index = URL(r'/?$', IndexPage) login = URL(r'/compte/connexion', LoginPage) content = URL(r'/news/.+', r'/wiki/(?!nouveau)[^/]+', r'/suivi/[^\.]+', r'/sondages/[^\.]+', r'/users/[^\./]+/journaux/[^\.]+', r'/forums/[^\./]+/posts/[^\.]+', ContentPage) wiki_edit = URL(r'/wiki$', r'/wiki/nouveau', r'/wiki/[^\.]+/modifier', WikiEditPage) comment = URL(r'/nodes/(\d+)/comments/(\d+)', CommentPage) new_comment = URL(r'/nodes/(\d+)/comments/nouveau', NewCommentPage) node = URL(r'/nodes/(\d+)/comments', NodePage) new_tag = URL(r'/nodes/(\d+)/tags/nouveau', NewTagPage) board_index = URL(r'/board/index.xml', BoardIndexPage) rss_comment = URL(r'/nodes/(\d+)/comments.atom', RSSComment) last_board_msg_id = None _token = None def parse_id(self, _id): if re.match('^https?://.*linuxfr.org/nodes/\d+/comments/\d+$', _id): return _id, None url = id2url(_id) if url is None: if url2id(_id) is not None: url = _id _id = url2id(url) else: return None, None return url, _id @need_login def get_wiki_content(self, _id): url, _id = self.parse_id('W.%s' % _id) if url is None: return None try: self.location('%s/modifier' % url) except HTTPNotFound: return '' assert self.wiki_edit.is_here() return self.page.get_body() def _go_on_wiki_edit_page(self, name): """ Go on the wiki page named 'name'. Return True if this is a new page, or False if the page already exist. Return None if it isn't a right wiki page name. """ url, _id = self.parse_id('W.%s' % name) if url is None: return None try: self.location('%s/modifier' % url) except HTTPNotFound: self.location('/wiki/nouveau') new = True else: new = False assert self.wiki_edit.is_here() return new @need_login def set_wiki_content(self, name, content, message): new = self._go_on_wiki_edit_page(name) if new is None: return None if new: title = name.replace('-', ' ') else: title = None self.page.post_content(title, content, message) @need_login def get_wiki_preview(self, name, content): if self._go_on_wiki_edit_page(name) is None: return None self.page.post_preview(content) if self.wiki_edit.is_here(): return self.page.get_preview_html() elif self.content.is_here(): return self.page.get_article().body def get_hash(self, url): self.location(url) if self.page.doc.xpath('//entry'): myhash = hashlib.md5(lxml.etree.tostring( self.page.doc)).hexdigest() return myhash else: return None def get_content(self, _id): url, _id = self.parse_id(_id) if url is None: return None self.location(url) if self.comment.is_here(): content = self.page.get_comment() elif self.content.is_here(): m = re.match('.*#comment-(\d+)$', url) if m: content = self.page.get_comment(int(m.group(1))) else: content = self.page.get_article() else: raise ParseError('Not on a content or comment page (%r)' % self.page) if _id is not None: content.id = _id return content @need_login def post_comment(self, thread, reply_id, title, message): url = id2url(thread) if url is None: raise CantSendMessage('%s is not a right ID' % thread) self.location(url) assert self.content.is_here() self.location(self.page.get_post_comment_url()) assert self.new_comment.is_here() form = self.page.get_form( xpath='//form[contains(@action,"comment_new")]') if title is not None: form['comment[title]'] = title.encode('utf-8') form['comment[wiki_body]'] = message.encode('utf-8') if int(reply_id) > 0: form['comment[parent_id]'] = str(reply_id) form['commit'] = 'Poster le commentaire' try: form.submit() except HTTPError as e: raise CantSendMessage('Unable to send message to %s.%s: %s' % (thread, reply_id, e)) if self.node.is_here(): errors = self.page.get_errors() if len(errors) > 0: raise CantSendMessage('Unable to send message: %s' % ', '.join(errors)) return None def do_login(self): if self.username is None: return # not usefull for the moment #self.location('/', no_login=True) data = { 'account[login]': self.username, 'account[password]': self.password, 'account[remember_me]': 1, #'authenticity_token': self.page.get_login_token(), } self.location('/compte/connexion', data=data) if not self.is_logged(): raise BrowserIncorrectPassword() self._token = self.page.doc.xpath( '//input[@name="authenticity_token"]') def is_logged(self): return (self.username is None or (self.page and self.page.logged)) def close_session(self): if self._token: self.open( '/compte/deconnexion', data={'authenticity_token': self._token[0].attrib['value']}) def plusse(self, url): return self.relevance(url, 'for') def moinse(self, url): return self.relevance(url, 'against') @need_login def relevance(self, url, what): comment = self.get_content(url) if comment is None: raise ValueError('The given URL isn\'t a comment.') if comment.relevance_token is None: return False res = self.open('%s%s' % (comment.relevance_url, what), data={ 'authenticity_token': comment.relevance_token }).content return res def iter_new_board_messages(self): self.location('/board/index.xml') assert self.board_index.is_here() msgs = self.page.get_messages(self.last_board_msg_id) for msg in reversed(msgs): self.last_board_msg_id = msg.id yield msg @need_login def board_post(self, msg): self.open(self.absurl('/board/'), data={'board[message]': msg}, headers={'Referer': self.absurl('/')}) @need_login def add_tag(self, _id, tag): url, _id = self.parse_id(_id) if url is None: return None self.location(url) assert self.content.is_here() self.location(self.page.get_tag_url()) assert self.new_tag.is_here() self.page.tag(tag)
class LCLBrowser(LoginBrowser, StatesMixin): BASEURL = 'https://particuliers.secure.lcl.fr' STATE_DURATION = 15 login = URL(r'/outil/UAUT\?from=/outil/UWHO/Accueil/', r'/outil/UAUT\?from=.*', r'/outil/UWER/Accueil/majicER', r'/outil/UWER/Enregistrement/forwardAcc', LoginPage) contracts_page = URL(r'/outil/UAUT/Contrat/choixContrat.*', r'/outil/UAUT/Contract/getContract.*', r'/outil/UAUT/Contract/selectContracts.*', r'/outil/UAUT/Accueil/preRoutageLogin', ContractsPage) contracts_choice = URL(r'.*outil/UAUT/Contract/routing', ContractsChoicePage) home = URL(r'/outil/UWHO/Accueil/', HomePage) accounts = URL(r'/outil/UWSP/Synthese', AccountsPage) client = URL(r'/outil/uwho', ClientPage) history = URL( r'/outil/UWLM/ListeMouvements.*/acces(ListeMouvements|DetailsMouvement).*', r'/outil/UWLM/DetailMouvement.*/accesDetailMouvement.*', r'/outil/UWLM/Rebond', AccountHistoryPage) rib = URL(r'/outil/UWRI/Accueil/detailRib', r'/outil/UWRI/Accueil/listeRib', RibPage) finalrib = URL(r'/outil/UWRI/Accueil/', RibPage) cards = URL(r'/outil/UWCB/UWCBEncours.*/listeCBCompte.*', r'/outil/UWCB/UWCBEncours.*/listeOperations.*', CardsPage) skip = URL(r'/outil/UAUT/Contrat/selectionnerContrat.*', r'/index.html') no_perm = URL(r'/outil/UAUT/SansDroit/affichePageSansDroit.*', NoPermissionPage) bourse = URL( r'https://bourse.secure.lcl.fr/netfinca-titres/servlet/com.netfinca.frontcr.synthesis.HomeSynthesis', r'https://bourse.secure.lcl.fr/netfinca-titres/servlet/com.netfinca.frontcr.account.*', r'/outil/UWBO.*', BoursePage) disc = URL( r'https://bourse.secure.lcl.fr/netfinca-titres/servlet/com.netfinca.frontcr.login.ContextTransferDisconnect', r'https://assurance-vie-et-prevoyance.secure.lcl.fr/filiale/entreeBam\?.*\btypeaction=reroutage_retour\b', r'https://assurance-vie-et-prevoyance.secure.lcl.fr/filiale/ServletReroutageCookie', r'/outil/UAUT/RetourPartenaire/retourCar', DiscPage) form2 = URL(r'/outil/UWVI/Routage', Form2Page) send_token = URL(r'/outil/UWVI/AssuranceVie/envoyerJeton', SendTokenPage) calie_detail = URL( r'https://www.my-calie.fr/FO.HoldersWebSite/Disclaimer/Disclaimer.aspx.*', r'https://www.my-calie.fr/FO.HoldersWebSite/Contract/ContractDetails.aspx.*', r'https://www.my-calie.fr/FO.HoldersWebSite/Contract/ContractOperations.aspx.*', CaliePage) calie_contracts = URL( r'https://www.my-calie.fr/FO.HoldersWebSite/Contract/SearchContract.aspx', CalieContractsPage) assurancevie = URL(r'/outil/UWVI/AssuranceVie/accesSynthese', r'/outil/UWVI/AssuranceVie/accesDetail.*', AVPage) av_list = URL( r'https://assurance-vie-et-prevoyance.secure.lcl.fr/rest/assurance/synthesePartenaire', AVListPage) avdetail = URL( r'https://assurance-vie-et-prevoyance.secure.lcl.fr/consultation/epargne', AVDetailPage) av_history = URL( r'https://assurance-vie-et-prevoyance.secure.lcl.fr/rest/assurance/historique', AVHistoryPage) av_investments = URL( r'https://assurance-vie-et-prevoyance.secure.lcl.fr/rest/detailEpargne/contrat/(?P<life_insurance_id>\w+)', AVInvestmentsPage) loans = URL(r'/outil/UWCR/SynthesePar/', LoansPage) loans_pro = URL(r'/outil/UWCR/SynthesePro/', LoansProPage) transfer_page = URL(r'/outil/UWVS/', TransferPage) confirm_transfer = URL(r'/outil/UWVS/Accueil/redirectView', TransferPage) recipients = URL(r'/outil/UWBE/Consultation/list', RecipientPage) add_recip = URL(r'/outil/UWBE/Creation/creationSaisie', AddRecipientPage) recip_confirm = URL(r'/outil/UWBE/Creation/creationConfirmation', RecipConfirmPage) send_sms = URL(r'/outil/UWBE/Otp/envoiCodeOtp\?telChoisi=MOBILE', '/outil/UWBE/Otp/getValidationCodeOtp\?codeOtp', SmsPage) recip_recap = URL(r'/outil/UWBE/Creation/executeCreation', RecipRecapPage) documents = URL(r'/outil/UWDM/ConsultationDocument/derniersReleves', r'/outil/UWDM/Recherche/rechercherAll', DocumentsPage) documents_plus = URL(r'/outil/UWDM/Recherche/afficherPlus', DocumentsPage) profile = URL(r'/outil/UWIP/Accueil/rafraichir', ProfilePage) deposit = URL(r'/outil/UWPL/CompteATerme/accesSynthese', r'/outil/UWPL/DetailCompteATerme/accesDetail', DepositPage) __states__ = ('contracts', 'current_contract', 'parsed_contracts') IDENTIFIANT_ROUTING = 'CLI' def __init__(self, *args, **kwargs): super(LCLBrowser, self).__init__(*args, **kwargs) self.accounts_list = None self.current_contract = None self.contracts = [] self.parsed_contracts = False self.owner_type = AccountOwnerType.PRIVATE def load_state(self, state): super(LCLBrowser, self).load_state(state) # lxml _ElementStringResult were put in the state, convert them to plain strs # TODO to remove at some point if self.contracts: self.contracts = [unicode(s) for s in self.contracts] if self.current_contract: self.current_contract = unicode(self.current_contract) def do_login(self): assert isinstance(self.username, basestring) assert isinstance(self.password, basestring) if not self.password.isdigit(): raise BrowserIncorrectPassword() # Since a while the virtual keyboard accepts only the first 6 digits of the password self.password = self.password[:6] # we force the browser to go to login page so it's work even # if the session expire # Must set the referer to avoid redirection to the home page self.login.go(headers={"Referer": "https://www.lcl.fr/"}) if not self.page.login(self.username, self.password) or self.login.is_here(): self.page.check_error() if not self.contracts and not self.parsed_contracts: # On the preRoutageLogin page we gather the list of available contracts for this account self.contracts = self.page.get_contracts_list() # If there is not multiple contracts then self.contracts will be empty if not self.contracts: self.page.select_contract() self.parsed_contracts = True self.accounts_list = None self.accounts.stay_or_go() @need_login def connexion_bourse(self): self.location( '/outil/UWBO/AccesBourse/temporisationCar?codeTicker=TICKERBOURSECLI' ) if self.no_perm.is_here(): return False next_page = self.page.get_next() if next_page: # go on a intermediate page to get a session cookie (jsessionid) self.location(next_page) # go to bourse page self.bourse.stay_or_go() return True def deconnexion_bourse(self): self.disc.stay_or_go() @need_login def go_life_insurance_website(self): self.assurancevie.stay_or_go() life_insurance_routage_url = self.page.get_routage_url() if life_insurance_routage_url: self.location(life_insurance_routage_url) self.av_list.go() @need_login def update_life_insurance_account(self, life_insurance): self.av_investments.go(life_insurance_id=life_insurance.id) return self.page.update_life_insurance_account(life_insurance) @need_login def go_back_from_life_insurance_website(self): self.avdetail.stay_or_go() self.page.come_back() def select_contract(self, id_contract): if self.current_contract and id_contract != self.current_contract: self.logger.debug('Changing contract to %s', id_contract) # when we go on bourse page, we can't change contract anymore... we have to logout. self.location('/outil/UAUT/Login/logout') # we already passed all checks on do_login so we consider it's ok. self.login.go().login(self.username, self.password) self.contracts_choice.go().select_contract(id_contract) def go_contract(f): @wraps(f) def wrapper(self, account, *args, **kwargs): self.select_contract(account._contract) return f(self, account, *args, **kwargs) return wrapper def check_accounts(self, account): return all(account.id != acc.id for acc in self.accounts_list) def update_accounts(self, account): if self.check_accounts(account): account._contract = self.current_contract self.accounts_list.append(account) def set_deposit_account_id(self, account): self.deposit.go() if self.no_perm.is_here(): self.logger.warning('Deposits are unavailable.') else: form = self.page.get_form(id='mainform') form['INDEX'] = account._link_index form.submit() self.page.set_deposit_account_id(account) self.deposit.go() @need_login def get_accounts(self): # This is required in case the browser is left in the middle of add_recipient and the session expires. if self.login.is_here(): return self.get_accounts_list() profile_name = self.get_profile_name() if ' ' in profile_name: owner_name = re.search(r' (.+)', profile_name).group(1).upper() else: owner_name = profile_name.upper() # retrieve life insurance accounts self.assurancevie.stay_or_go() if self.no_perm.is_here(): self.logger.warning('Life insurances are unavailable.') else: # retrieve life insurances from popups for a in self.page.get_popup_life_insurance(name=owner_name): self.update_accounts(a) # retrieve life insurances from calie website calie_index = self.page.get_calie_life_insurances_first_index() if calie_index: form = self.page.get_form(id="formRedirectPart") form['INDEX'] = calie_index form.submit() # if only one calie insurance, request directly leads to details on CaliePage if self.calie_detail.is_here(): self.page.check_error() a = Account() a.url = self.url self.page.fill_account(obj=a) self.update_accounts(a) # if several calie insurances, request leads to CalieContractsPage elif self.calie_contracts.is_here(): for a in self.page.iter_calie_life_insurance(): if a.url: self.location(a.url) self.page.fill_account(obj=a) self.update_accounts(a) else: self.logger.warning( '%s has no url to parse detail to' % a) # get back to life insurances list page self.assurancevie.stay_or_go() # retrieve life insurances on special lcl life insurance website if self.page.is_website_life_insurance(): self.go_life_insurance_website() for life_insurance in self.page.iter_life_insurance(): life_insurance = self.update_life_insurance_account( life_insurance) self.update_accounts(life_insurance) self.go_back_from_life_insurance_website() # retrieve accounts on main page self.accounts.go() for a in self.page.get_accounts_list(name=owner_name): if not self.check_accounts(a): continue self.location('/outil/UWRI/Accueil/') if self.no_perm.is_here(): self.logger.warning('RIB is unavailable.') elif self.page.has_iban_choice(): self.rib.go( data={ 'compte': '%s/%s/%s' % (a.id[0:5], a.id[5:11], a.id[11:]) }) if self.rib.is_here(): iban = self.page.get_iban() a.iban = iban if iban and a.id[ 11:] in iban else NotAvailable else: iban = self.page.check_iban_by_account(a.id) a.iban = iban if iban is not None else NotAvailable self.update_accounts(a) # retrieve loans accounts self.loans.stay_or_go() if self.no_perm.is_here(): self.logger.warning('Loans are unavailable.') else: for a in self.page.get_list(): self.update_accounts(a) # retrieve pro loans accounts self.loans_pro.stay_or_go() if self.no_perm.is_here(): self.logger.warning('Loans are unavailable.') else: for a in self.page.get_list(): self.update_accounts(a) if self.connexion_bourse(): for a in self.page.get_list(name=owner_name): self.update_accounts(a) self.deconnexion_bourse() # Disconnecting from bourse portal before returning account list # to be sure that we are on the banque portal # retrieve deposit accounts self.deposit.stay_or_go() if self.no_perm.is_here(): self.logger.warning('Deposits are unavailable.') else: for a in self.page.get_list(name=owner_name): # There is no id on the page listing the 'Compte à terme' # So a form must be submitted to access the id of the contract self.set_deposit_account_id(a) self.update_accounts(a) @need_login def get_accounts_list(self): if self.accounts_list is None: self.accounts_list = [] if self.contracts and self.current_contract: for id_contract in self.contracts: self.select_contract(id_contract) self.get_accounts() else: self.get_accounts() self.accounts.go() deferred_cards = self.page.get_deferred_cards() # We got deferred card page link and we have to go through it to get details. for account_id, link in deferred_cards: parent_account = find_object(self.accounts_list, id=account_id) self.location(link) # Url to go to each account card is made of agence id, parent account id, # parent account key id and an index of the card (0,1,2,3,4...). # This index is not related to any information, it's just an incremental integer for card_position, a in enumerate( self.page.get_child_cards(parent_account)): a._card_position = card_position self.update_accounts(a) profile_name = self.get_profile_name() if ' ' in profile_name: owner_name = re.search(r' (.+)', profile_name).group(1).upper() else: owner_name = profile_name.upper() for account in self.accounts_list: account.owner_type = self.owner_type self.set_ownership(account, owner_name) return iter(self.accounts_list) def set_ownership(self, account, owner_name): if not account.ownership: if account.parent and account.parent.ownership: account.ownership = account.parent.ownership elif re.search( r'(m|mr|me|mme|mlle|mle|ml)\.? (.*)\bou (m|mr|me|mme|mlle|mle|ml)\b(.*)', account.label, re.IGNORECASE): account.ownership = AccountOwnership.CO_OWNER elif all(n in account.label for n in owner_name.split()): account.ownership = AccountOwnership.OWNER else: account.ownership = AccountOwnership.ATTORNEY def get_bourse_accounts_ids(self): bourse_accounts_ids = [] for account in self.get_accounts_list(): if 'bourse' in account.id: bourse_accounts_ids.append(account.id.split('bourse')[0]) return bourse_accounts_ids @go_contract @need_login def get_history(self, account): if hasattr(account, '_market_link') and account._market_link: self.connexion_bourse() self.location(account._link_id, params={ 'nump': account._market_id, }) self.page.get_fullhistory() for tr in self.page.iter_history(): yield tr self.deconnexion_bourse() elif hasattr(account, '_link_id') and account._link_id: try: self.location(account._link_id) except ServerError: return if self.login.is_here(): # Website crashed and we are disconnected. raise BrowserUnavailable() date_guesser = LinearDateGuesser() for tr in self.page.get_operations(date_guesser=date_guesser): yield tr elif account.type == Account.TYPE_CARD: for tr in self.get_cb_operations(account=account, month=1): yield tr elif account.type == Account.TYPE_LIFE_INSURANCE: if not account._external_website: self.logger.warning( 'This account is limited, there is no available history.') return if account._is_calie_account: # TODO build parsing of history page, all-you-can-eat js in it # follow 'account._history_url' for that raise NotImplementedError() else: self.assurancevie.stay_or_go() self.go_life_insurance_website() assert self.av_list.is_here( ), 'Something went wrong during iter life insurance history' # Need to be on account details page to do history request self.av_investments.go(life_insurance_id=account.id) self.av_history.go() for tr in self.page.iter_history(): yield tr self.go_back_from_life_insurance_website() @need_login def get_coming(self, account): if account.type == Account.TYPE_CARD: for tr in self.get_cb_operations(account=account, month=0): yield tr # %todo check this decorator : @go_contract @need_login def get_cb_operations(self, account, month=0): """ Get CB operations. * month=0 : current operations (non debited) * month=1 : previous month operations (debited) """ # Separation of bank account id and bank account key # example : 123456A regex = r'([0-9]{6})([A-Z]{1})' account_id_regex = re.match(regex, account.parent._compte) args = { 'AGENCE': account.parent._agence, 'COMPTE': account_id_regex.group(1), 'CLE': account_id_regex.group(2), 'NUMEROCARTE': account._card_position, 'MOIS': month, } # We must go to '_cards_list' url first before transaction_link, otherwise, the website # will show same transactions for all account, despite different values in 'args'. assert 'MOIS=' in account._cards_list, 'Missing "MOIS=" in url' init_url = account._cards_list.replace('MOIS=0', 'MOIS=%s' % month) self.location(init_url) self.location(account._transactions_link, params=args) if month == 1: summary = self.page.get_card_summary() if summary: yield summary for tr in self.page.iter_transactions(): # Strange behavior, but sometimes, rdate > date. # We skip it to avoid duplicate transactions. if tr.date >= tr.rdate: yield tr @go_contract @need_login def get_investment(self, account): if account.type == Account.TYPE_LIFE_INSURANCE: if not account._external_website: self.logger.warning( 'This account is limited, there is no available investment.' ) return self.assurancevie.stay_or_go() if account._is_calie_account: calie_details = self.open(account.url) for inv in calie_details.page.iter_investment(): yield inv else: self.go_life_insurance_website() assert self.av_list.is_here( ), 'Something went wrong during iter life insurance investments' self.av_investments.go(life_insurance_id=account.id) for inv in self.page.iter_investment(): yield inv self.go_back_from_life_insurance_website() elif hasattr(account, '_market_link') and account._market_link: self.connexion_bourse() for inv in self.location( account._market_link).page.iter_investment(): yield inv self.deconnexion_bourse() elif account.id in self.get_bourse_accounts_ids(): yield create_french_liquidity(account.balance) def locate_browser(self, state): if state[ 'url'] == 'https://particuliers.secure.lcl.fr/outil/UWBE/Creation/creationConfirmation': self.logged = True else: super(LCLBrowser, self).locate_browser(state) @need_login def send_code(self, recipient, **params): res = self.open('/outil/UWBE/Otp/validationCodeOtp?codeOtp=%s' % params['code']) if res.text == 'false': raise AddRecipientBankError(message='Mauvais code sms.') self.recip_recap.go().check_values(recipient.iban, recipient.label) return self.get_recipient_object(recipient.iban, recipient.label) @need_login def get_recipient_object(self, iban, label): r = Recipient() r.iban = iban r.id = iban r.label = label r.category = u'Externe' r.enabled_at = datetime.now().replace(microsecond=0) + timedelta( days=5) r.currency = u'EUR' r.bank_name = NotAvailable return r @need_login def new_recipient(self, recipient, **params): if 'code' in params: return self.send_code(recipient, **params) if recipient.iban[:2] not in ('FR', 'MC'): raise AddRecipientBankError( message=u"LCL n'accepte que les iban commençant par MC ou FR.") for _ in range(2): self.add_recip.go() if self.add_recip.is_here(): break if self.no_perm.is_here() and self.page.get_error_msg(): raise AddRecipientBankError(message=self.page.get_error_msg()) assert self.add_recip.is_here(), 'Navigation failed: not on add_recip' self.page.validate(recipient.iban, recipient.label) assert self.recip_confirm.is_here( ), 'Navigation failed: not on recip_confirm' self.page.check_values(recipient.iban, recipient.label) # Send sms to user. self.open('/outil/UWBE/Otp/envoiCodeOtp?telChoisi=MOBILE') raise AddRecipientStep( self.get_recipient_object(recipient.iban, recipient.label), Value('code', label='Saisissez le code.')) @go_contract @need_login def iter_recipients(self, origin_account): if origin_account._transfer_id is None: return self.transfer_page.go() if self.no_perm.is_here() or not self.page.can_transfer( origin_account._transfer_id): return self.page.choose_origin(origin_account._transfer_id) for recipient in self.page.iter_recipients( account_transfer_id=origin_account._transfer_id): yield recipient @go_contract @need_login def init_transfer(self, account, recipient, amount, reason=None, exec_date=None): self.transfer_page.go() self.page.choose_origin(account._transfer_id) self.page.choose_recip(recipient) if exec_date == date.today(): self.page.transfer(amount, reason) else: self.page.deferred_transfer(amount, reason, exec_date) return self.page.handle_response(account, recipient) @need_login def execute_transfer(self, transfer): self.page.confirm() self.page.check_error() return transfer @need_login def get_advisor(self): return iter([self.accounts.stay_or_go().get_advisor()]) @need_login def iter_subscriptions(self): yield self.client.go().get_item() @need_login def iter_documents(self, subscription): documents = [] self.documents.go() self.documents_plus.go() self.page.do_search_request() for document in self.page.get_list(): documents.append(document) return documents def get_profile_name(self): self.accounts.stay_or_go() return self.page.get_name() @need_login def get_profile(self): name = self.get_profile_name() # The self.get_profile_name() already does a # self.accounts.stay_or_go() self.profile.go(method="POST") profile = self.page.get_profile(name=name) return profile
class BinckBrowser(LoginBrowser): BASEURL = 'https://web.binck.fr' old_website_connection = False unique_account = False login = URL(r'/Logon', LoginPage) view = URL('/PersonIntroduction/Index', ViewPage) logon_flow = URL(r'/AmlQuestionnairesOverview/LogonFlow$', LogonFlowPage) account = URL(r'/PortfolioOverview/Index', AccountsPage) accounts = URL(r'/PersonAccountOverview/Index', AccountsPage) old_accounts = URL(r'/AccountsOverview/Index', OldAccountsPage) account_switch = URL('/Header/SwitchAccount', SwitchPage) home_page = URL(r'/$', r'/Home/Index', HomePage) investment = URL(r'/PortfolioOverview/GetPortfolioOverview', InvestmentPage) investment_list = URL(r'PortfolioOverview$', InvestmentListPage) invest_detail = URL(r'/SecurityInformation/Get', InvestDetailPage) history = URL(r'/TransactionsOverview/GetTransactions', r'/TransactionsOverview/FilteredOverview', HistoryPage) questions = URL(r'/FDL_Complex_FR_Compte', r'FsmaMandatoryQuestionnairesOverview', QuestionPage) change_pass = URL(r'/ChangePassword/Index', r'/EditSetting/GetSetting\?code=MutationPassword', ChangePassPage) handle_passwords = URL(r'/PersonalCredentials/Index', HandlePasswordsPage) postpone_passwords = URL(r'/PersonalCredentials/PostPone', PostponePasswords) def deinit(self): if self.page and self.page.logged: self.location('https://www.binck.fr/deconnexion-site-client') super(BinckBrowser, self).deinit() def do_login(self): self.login.go().login(self.username, self.password) if self.login.is_here(): error = self.page.get_error() if error and 'mot de passe' in error: raise BrowserIncorrectPassword(error) elif error and any(( 'Votre compte a été bloqué / clôturé' in error, 'Votre compte est bloqué, veuillez contacter le Service Clients' in error, )): raise ActionNeeded(error) raise AssertionError('Unhandled behavior at login: error is "{}"'.format(error)) @need_login def switch_account(self, account_id): self.accounts.stay_or_go() if self.accounts.is_here(): token = self.page.get_token() data = {'accountNumber': account_id} # Important: the "switch" request without the token will return a 500 error self.account_switch.go(data=data, headers=token) # We should be automatically redirected to the accounts page: assert self.accounts.is_here(), 'switch_account did not redirect to AccountsPage properly' @need_login def iter_accounts(self): # If we already know that it is an old website connection, # we can call old_website_connection() right away. if self.old_website_connection: for account in self.iter_old_accounts(): yield account return if self.unique_account: self.account.stay_or_go() else: self.accounts.stay_or_go() if self.page.has_accounts_table(): for a in self.page.iter_accounts(): a._invpage = None a._histpages = None self.switch_account(a.id) # We must get the new token almost everytime we get a new page: if self.accounts.is_here(): token = self.page.get_token() # Get valuation_diff from the investment page try: data = {'grouping': 'SecurityCategory'} a.valuation_diff = self.investment.go(data=data, headers=token).get_valuation_diff() except HTTPNotFound: # if it is not an invest account, the portfolio link may be present but hidden and return a 404 a.valuation_diff = None yield a # Some Binck connections don't have any accounts on the new AccountsPage, # so we need to fetch them on the OldAccountsPage for now: else: self.old_website_connection = True for account in self.iter_old_accounts(): yield account @need_login def iter_old_accounts(self): self.old_accounts.go() for a in self.page.iter_accounts(): try: self.old_accounts.stay_or_go().go_to_account(a.id) except ServerError as exception: # get html error to parse parser = etree.HTMLParser() html_error = etree.parse(StringIO(exception.response.text), parser) account_error = html_error.xpath('//p[contains(text(), "Votre compte est")]/text()') if account_error: raise ActionNeeded(account_error[0]) else: raise a.iban = self.page.get_iban() # Get token token = self.page.get_token() # Get investment page data = {'grouping': "SecurityCategory"} try: a._invpage = self.investment.go(data=data, headers=token) \ if self.page.is_investment() else None except HTTPNotFound: # if it's not an invest account, the portfolio link may be present but hidden and return a 404 a._invpage = None if a._invpage: a.valuation_diff = a._invpage.get_valuation_diff() # Get history page data = [('currencyCode', a.currency), ('startDate', ""), ('endDate', "")] a._histpages = [self.history.go(data=data, headers=token)] while self.page.doc['EndOfData'] is False: a._histpages.append(self.history.go(data=self.page.get_nextpage_data(data[:]), headers=token)) yield a @need_login def iter_investment(self, account): if account.balance == 0: return # Start with liquidities: if account._liquidity: yield create_french_liquidity(account._liquidity) if self.old_website_connection: self.old_accounts.stay_or_go().go_to_account(account.id) if account._invpage: for inv in account._invpage.iter_investment(currency=account.currency): if not inv.code: params = {'securityId': inv._security_id} self.invest_detail.go(params=params) if self.invest_detail.is_here(): inv.code, inv.code_type = self.page.get_isin_code_and_type() yield inv return self.switch_account(account.id) token = self.page.get_token() try: data = {'grouping': 'SecurityCategory'} self.investment.go(data=data, headers=token) except HTTPNotFound: return for inv in self.page.iter_investment(currency=account.currency): yield inv @need_login def iter_history(self, account): if self.old_website_connection: if account._histpages: for page in account._histpages: for tr in page.iter_history(): yield tr return self.switch_account(account.id) token = self.page.get_token() data = [('currencyCode', account.currency), ('startDate', ''), ('endDate', '')] history_pages = [self.history.go(data=data, headers=token)] while self.page.doc['EndOfData'] is False: history_pages.append(self.history.go(data=self.page.get_nextpage_data(data[:]), headers=token)) for page in history_pages: for tr in page.iter_history(): yield tr
class MetalArchivesBrowser(LoginBrowser): """ Browsing the Metal Archives website. """ BASEURL = 'https://www.metal-archives.com/' login = URL('authentication/login', LoginPage) bands = URL(r'search/ajax-band-search/\?field=name&query=(?P<pattern>.*)', SearchBandsPage) band = URL('bands/Band/(?P<band_id>.*)', BandPage) albums = URL('band/discography/id/(?P<band_id>.*)/tab/all', AlbumPage) favorites = URL(r'bookmark/ajax-list/type/band\?sEcho=1', FavoritesPage) suggested = URL(r'band/ajax-recommendations/id/(?P<band_id>.*)\?showMoreSimilar=1', SuggestionsPage) def do_login(self): d = { 'loginUsername': self.username, 'loginPassword': self.password } self.login.go(data=d) def iter_band_search(self, pattern): for band in self.bands.go(pattern=pattern).iter_bands(): yield band def get_info(self, id): return self.band.go(band_id=id).get_info() def get_albums(self, id): for album in self.albums.go(band_id=id).iter_albums(): yield album @need_login def get_favorites(self): for favorite in self.favorites.go().iter_favorites(): yield favorite @need_login def get_suggestions(self, bands): return self.suggested.go().iter_suggestions() @need_login def suggestions(self, band_list): # Offers band suggestions depending on your favorite bands. if not band_list: self.logger.warning('In order to get band suggestions, you first need to add some favorite artists of the Metal Archives website.') return similar_bands = [] for band in band_list: # Gets all the similar artists of your favorite bands: similar_bands.extend(self.suggested.go(band_id=band).iter_suggestions()) if not similar_bands: self.logger.warning('Your favorite artists did not contain any similar bands.') return suggestions = {} suggested_bands = {} for band in similar_bands: if band.id in band_list: # Skip the artists that are already in the favorite band list continue else: # Adds the similar artist to the suggestions dictionary if it is not already in the favorite bands: if band.url not in suggestions: # Creates a counter for each new similar artist in the suggestions: suggestions[band.url] = 1 suggested_bands[band.url] = band else: # Increments '+1' if the similar artist is already in the suggestions: suggestions[band.url] += 1 suggestion_list = [] for band in range(13): # This maximum can be modified if you want more or less band suggestions best_suggestion = max(suggestions, key=suggestions.get) suggestion_list.append(suggested_bands.get(best_suggestion)) suggestions.pop(best_suggestion) assert suggestion_list, 'Failed to return any suggestions from your favorite artists.' # The top 13 similar artists to your favorite bands for band in suggestion_list: yield band
class VimeoBrowser(PagesBrowser): BASEURL = 'https://vimeo.com' APIURL = 'http://vimeo.com/api/rest/v2' CONSUMER_KEY = 'ae4ac83f9facda375a72fed704a3643a' CONSUMER_SECRET = 'b6072a4aba1eaaed' video_url = URL(r'https://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage) list_page = URL(r'categories/(?P<category>.*)/videos/.*?', ListPage) categories_page = URL('categories', CategoriesPage) api_page = URL('https://api.vimeo.com/search\?filter_mature=191&filter_type=clip&sort=featured&direction=desc&page=(?P<page>\d*)&per_page=20&sizes=590x332&_video_override=true&c=b&query=&filter_category=(?P<category>\w*)&fields=search_web%2Cmature_hidden_count&container_fields=parameters%2Ceffects%2Csearch_id%2Cstream_id%2Cmature_hidden_count', APIPage) _api = URL(APIURL, XMLAPIPage) def __init__(self, method, quality, *args, **kwargs): self.method = method self.quality = quality PagesBrowser.__init__(self, *args, **kwargs) def fill_video_infos(self, _id, video=None): headers = {'Content-Type': 'application/x-www-form-urlencoded'} data = {'method': 'vimeo.videos.getInfo', 'video_id': _id} self._prepare_request(self.APIURL, method='POST', headers=headers, data=data) return self._api.go(data=data).fill_video_infos(obj=video) def get_video(self, _id, video=None): video = self.fill_video_infos(_id, video) if video._is_hd == "0": video._quality = 2 else: video._quality = self.quality video._method = self.method return self.fill_video_url(video) def fill_video_url(self, video): self._setup_session(self.PROFILE) try: video = self.video_url.open(_id=video.id).fill_url(obj=video) if self.method == u'hls': streams = [] for item in self.read_url(video.url): item = item.decode('ascii') if not item.startswith('#') and item.strip(): streams.append(item) if streams: streams.reverse() url = streams[self.quality] if self.quality < len(streams) else streams[0] video.url = urljoin(video.url, url) else: video.url = NotAvailable return video except HTTPNotFound: return video def read_url(self, url): r = self.open(url, stream=True) buf = r.iter_lines() return buf def search_videos(self, pattern, sortby): headers = {'Content-Type': 'application/x-www-form-urlencoded'} data = {'method': 'vimeo.videos.search', 'sort': 'relevant', 'page': '1', 'full_response': '1', 'query': quote_plus(pattern.encode('utf-8'))} self._prepare_request(self.APIURL, method='POST', headers=headers, data=data) return self._api.go(data=data).iter_videos() def get_channels(self): headers = {'Content-Type': 'application/x-www-form-urlencoded'} data = {'method': 'vimeo.channels.getAll', 'page': '1', 'sort': 'most_subscribed'} # 'newest', 'oldest', 'alphabetical', 'most_videos', 'most_subscribed', 'most_recently_updated' self._prepare_request(self.APIURL, method='POST', headers=headers, data=data) return self._api.go(data=data).iter_channels() def get_channel_videos(self, channel): headers = {'Content-Type': 'application/x-www-form-urlencoded'} data = {'method': 'vimeo.channels.getVideos', 'sort': 'newest', # 'oldest', 'most_played', 'most_commented', 'most_liked' 'page': '1', 'channel_id': channel, 'full_response': '1'} self._prepare_request(self.APIURL, method='POST', headers=headers, data=data) return self._api.go(data=data).iter_videos() def get_categories(self): self._setup_session(self.PROFILE) return self.categories_page.go().iter_categories() def get_category_videos(self, category): token = self.list_page.go(category=category).get_token() self.session.headers.update({"Authorization": "jwt %s" % token, "Accept": "application/vnd.vimeo.*+json;version=3.3"}) return self.api_page.go(page=1, category=category).iter_videos() def _create_authorization(self, url, method, params=None): def _percent_encode(s): result = quote_plus(s).replace('+', '%20').replace('*', '%2A').replace('%7E', '~') # the implementation of the app has a bug. someone double escaped the '@' so we have to correct this # on our end. result = result.replace('%40', '%2540') return result def _compute_signature(s): key = _percent_encode(self.CONSUMER_SECRET) + '&' + _percent_encode('') key = key.encode('ascii') s = s.encode('ascii') a = hmac.new(key, s, sha1) sig = b64encode(a.digest()).decode('ascii') sig = sig.rstrip('\n') return sig def _normalize_parameters(_params): sorted_keys = sorted(_params.keys()) list_of_params = [] for key in sorted_keys: value = _params[key] # who wrote the android app should burn in hell! No clue of correct encoding - make up your mind if url == 'https://secure.vimeo.com/oauth/access_token' and key != 'x_auth_password': list_of_params.append('%s=%s' % (key, value)) pass else: list_of_params.append('%s=%s' % (key, _percent_encode(value))) pass pass return '&'.join(list_of_params) if not params: params = {} pass all_params = {'oauth_consumer_key': self.CONSUMER_KEY, 'oauth_signature_method': 'HMAC-SHA1', 'oauth_timestamp': str(time.time()), 'oauth_nonce': str(time.time()), 'oauth_version': '1.0'} all_params.update(params) base_string = _percent_encode(method.upper()) base_string += '&' base_string += _percent_encode(url) base_string += '&' base_string += _percent_encode(_normalize_parameters(all_params)) all_params['oauth_signature'] = _compute_signature(base_string) authorization = [] for key in all_params: if key.startswith('oauth_'): authorization.append('%s="%s"' % (key, _percent_encode(all_params[key]))) pass pass return {'Authorization': 'OAuth %s' % (', '.join(authorization))} def _prepare_request(self, url, method='GET', headers={}, data={}): _headers = { 'User-Agent': 'VimeoAndroid/1.1.42 (Android ver=4.4.2 sdk=19; Model\ samsung GT-I9505; Linux 3.4.0-3423977 armv7l)', 'Host': 'vimeo.com', 'Accept-Encoding': 'gzip, deflate'} self.session.headers.update(_headers) self.session.headers.update(headers) self.session.headers.update(self._create_authorization(url, method, data))
class LogicimmoBrowser(PagesBrowser): BASEURL = 'https://www.logic-immo.com/' PROFILE = Firefox() city = URL( 'asset/t9/getLocalityT9.php\?site=fr&lang=fr&json=%22(?P<pattern>.*)%22', CitiesPage) search = URL( '(?P<type>location-immobilier|vente-immobilier|recherche-colocation)-(?P<cities>.*)/options/(?P<options>.*)', SearchPage) housing = URL('detail-(?P<_id>.*).htm', HousingPage) phone = URL('(?P<urlcontact>.*)', PhonePage) TYPES = { POSTS_TYPES.RENT: 'location-immobilier', POSTS_TYPES.SALE: 'vente-immobilier', POSTS_TYPES.SHARING: 'recherche-colocation', POSTS_TYPES.FURNISHED_RENT: 'location-immobilier', POSTS_TYPES.VIAGER: 'vente-immobilier' } RET = { HOUSE_TYPES.HOUSE: '2', HOUSE_TYPES.APART: '1', HOUSE_TYPES.LAND: '3', HOUSE_TYPES.PARKING: '10', HOUSE_TYPES.OTHER: '14' } def __init__(self, *args, **kwargs): super(LogicimmoBrowser, self).__init__(*args, **kwargs) self.session.headers['X-Requested-With'] = 'XMLHttpRequest' def get_cities(self, pattern): if pattern: return self.city.go(pattern=pattern).get_cities() def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): if type not in self.TYPES: raise TypeNotSupported() options = [] ret = [] if type == POSTS_TYPES.VIAGER: ret = ['15'] else: for house_type in house_types: if house_type in self.RET: ret.append(self.RET.get(house_type)) if len(ret): options.append('groupprptypesids=%s' % ','.join(ret)) if type == POSTS_TYPES.FURNISHED_RENT: options.append('searchoptions=4') options.append('pricemin=%s' % (cost_min if cost_min else '0')) if cost_max: options.append('pricemax=%s' % cost_max) options.append('areamin=%s' % (area_min if area_min else '0')) if area_max: options.append('areamax=%s' % area_max) if nb_rooms: if type == POSTS_TYPES.SHARING: options.append('nbbedrooms=%s' % ','.join([str(i) for i in range(nb_rooms, 7)])) else: options.append('nbrooms=%s' % ','.join([str(i) for i in range(nb_rooms, 7)])) self.search.go(type=self.TYPES.get(type, 'location-immobilier'), cities=cities, options='/'.join(options)) if type == POSTS_TYPES.SHARING: return self.page.iter_sharing() return self.page.iter_housings(query_type=type) def get_housing(self, _id, housing=None): return self.housing.go(_id=_id).get_housing(obj=housing) def get_phone(self, _id): if _id.startswith('location') or _id.startswith('vente'): urlcontact, params = self.housing.stay_or_go( _id=_id).get_phone_url_datas() return self.phone.go(urlcontact=urlcontact, params=params).get_phone()
class LogicimmoBrowser(PagesBrowser): BASEURL = 'http://www.logic-immo.com/' city = URL( 'asset/t9/t9_district/fr/(?P<size>\d*)/(?P<first_letter>\w)/(?P<pattern>.*)\.txt\?json=%22(?P<pattern2>.*)%22', CitiesPage) search = URL( '(?P<type>location|vente)-immobilier-(?P<cities>.*)/options/(?P<options>.*)', SearchPage) housing = URL('detail-(?P<_id>.*).htm', HousingPage) phone = URL('(?P<urlcontact>.*)', PhonePage) TYPES = {Query.TYPE_RENT: 'location', Query.TYPE_SALE: 'vente'} RET = { Query.HOUSE_TYPES.HOUSE: '2', Query.HOUSE_TYPES.APART: '1', Query.HOUSE_TYPES.LAND: '3', Query.HOUSE_TYPES.PARKING: '10', Query.HOUSE_TYPES.OTHER: '14' } def get_cities(self, pattern): if pattern: size = len(pattern) first_letter = pattern[0].upper() return self.city.go(size=size, first_letter=first_letter, pattern=pattern.upper(), pattern2=pattern.upper()).get_cities() def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): options = [] ret = [ self.RET.get(house_type) for house_type in house_types if house_type in self.RET ] if len(ret): options.append('groupprptypesids=%s' % ','.join(ret)) options.append('pricemin=%s' % (cost_min or '0')) if cost_max: options.append('pricemax=%s' % cost_max) options.append('areamin=%s' % (area_min or '0')) if area_max: options.append('areamax=%s' % area_max) if nb_rooms: options.append('nbrooms=%s' % nb_rooms) return self.search.go(type=self.TYPES.get(type, 'location'), cities=cities, options='/'.join(options)).iter_housings() def get_housing(self, _id, housing=None): return self.housing.go(_id=_id).get_housing(obj=housing) def get_phone(self, _id): urlcontact, params = self.housing.stay_or_go( _id=_id).get_phone_url_datas() return self.phone.go(urlcontact=urlcontact, params=params).get_phone()
class AmundiTCBrowser(LoginBrowser): TIMEOUT = 120.0 login = URL('/home', LoginPage) redirect = URL('/home_indispo_redirect', RedirectPage) accounts = URL('/home_ajax_noee\?api=/api/individu/positionTotale', AccountsPage) account_detail = URL('/home_ajax_noee', AccountDetailPage) account_history = URL('/home_ajax_noee\?api=/api/individu/operations', AccountHistoryPage) def __init__(self, website, *args, **kwargs): super(AmundiTCBrowser, self).__init__(*args, **kwargs) self.BASEURL = website def do_login(self): """ Attempt to log in. Note: this method does nothing if we are already logged in. """ assert isinstance(self.username, basestring) assert isinstance(self.password, basestring) self.login.go() self.page.login(self.username, self.password) if self.login.is_here(): raise BrowserIncorrectPassword() @need_login def iter_accounts(self): self.accounts.go() return self.page.iter_accounts() @need_login def iter_investments(self, account): # self.account_detail.go() self.account_detail.go( params={ 'api': '/api/individu/positionFonds', 'idEnt': account._ident, 'date': date.today().strftime('%d/%m/%Y'), 'flagUrlFicheFonds': 'true' }) return self.page.iter_investments(data={'acc': account}) @need_login def iter_history(self, account): self.account_history.go(params={'limit': 1}) total = int(self.page.doc['nbOperationsIndividuelles']) params = { 'valeurExterne': 'false', 'statut': 'CPTA', 'filtreStatutModeExclusion': 'false', 'limit': 100, 'offset': 0 } self.account_history.go(params=params) return self.page.iter_history(data={ 'acc': account, 'params': params, 'total': total })
class LeboncoinBrowser(PagesBrowser): BASEURL = 'http://www.leboncoin.fr/' city = URL( 'ajax/location_list.html\?city=(?P<city>.*)&zipcode=(?P<zip>.*)', CityListPage) search = URL( '(?P<type>.*)/offres/(?P<region>.*)/occasions/\?ps=(?P<ps>.*)&pe=(?P<pe>.*)&ros=(?P<ros>.*)&location=(?P<location>.*)&sqs=(?P<sqs>.*)&sqe=(?P<sqe>.*)&ret=(?P<ret>.*)&f=(?P<advert_type>.*)', '(?P<_type>.*)/offres/(?P<_region>.*)/occasions.*?', HousingListPage) housing = URL('ventes_immobilieres/(?P<_id>.*).htm', HousingPage) RET = { Query.HOUSE_TYPES.HOUSE: '1', Query.HOUSE_TYPES.APART: '2', Query.HOUSE_TYPES.LAND: '3', Query.HOUSE_TYPES.PARKING: '4', Query.HOUSE_TYPES.OTHER: '5' } def __init__(self, region, *args, **kwargs): super(LeboncoinBrowser, self).__init__(*args, **kwargs) self.region = region def get_cities(self, pattern): city = '' zip_code = '' if pattern.isdigit(): zip_code = pattern else: city = pattern return self.city.go(city=city, zip=zip_code).get_cities() def search_housings(self, query, advert_type): type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, ret = self.decode_query( query) if len(cities) == 0 or len(ret) == 0: return list() return self.search.go(region=self.region, location=cities, ros=nb_rooms, sqs=area_min, sqe=area_max, ps=cost_min, pe=cost_max, type=type, advert_type=advert_type, ret=ret).get_housing_list() def get_housing(self, _id, obj=None): return self.housing.go(_id=_id).get_housing(obj=obj) def decode_query(self, query): cities = [c.name for c in query.cities] ret = [self.RET.get(g) for g in query.house_types if g in self.RET] _type = 'ventes_immobilieres' if query.type == Query.TYPE_RENT: _type = 'locations' self.search.go(_type=_type, _region=self.region) nb_rooms = '' if not query.nb_rooms else self.page.get_rooms_min( query.nb_rooms) area_min = '' if not query.area_min else self.page.get_area_min( query.area_min) area_max = '' if not query.area_max else self.page.get_area_max( query.area_max) cost_min = '' if not query.cost_min else self.page.get_cost_min( query.cost_min) cost_max = '' if not query.cost_max else self.page.get_cost_max( query.cost_max) return _type, ','.join( cities ), nb_rooms, area_min, area_max, cost_min, cost_max, '&ret='.join(ret)