def get_cards(self): cards = [] for tr in self.doc.getiterator('tr'): link = Regexp(CleanText('./@onclick'), "'(.*)'", default=None)(tr) if link is not None and link.startswith('/outil/UWCB/UWCBEncours') and 'listeOperations' in link: cards.append(link) return cards
def parse(self, el): # we have two kinds of page and sometimes we don't have any advisor agency_phone = CleanText('//span/a[contains(@href, "rendezVous")]', replace=[(' ', '')], default=NotAvailable)(self) or \ CleanText('//div[has-class("lbp-numero")]/span', replace=[(' ', '')], default=NotAvailable)(self) advisor_phone = Regexp(CleanText( '//div[h3[contains(text(), "conseil")]]//span[2]', replace=[(' ', '')], default=""), '(\d+)', default="")(self) if advisor_phone.startswith(("06", "07")): self.env['phone'] = agency_phone self.env['mobile'] = advisor_phone else: self.env['phone'] = advisor_phone or agency_phone agency = CleanText( '//div[h3[contains(text(), "Bureau")]]/div[not(@class)][1]')( self) or NotAvailable name = CleanText('//div[h3[contains(text(), "conseil")]]//span[1]', default=None)(self) or \ CleanText('//div[@class="lbp-font-accueil"]/div[2]/div[1]/span[1]', default=None)(self) if name: self.env['name'] = name self.env['agency'] = agency else: self.env['name'] = agency
def parse(self, el): # we have two kinds of page and sometimes we don't have any advisor agency_phone = CleanText('//span/a[contains(@href, "rendezVous")]', replace=[(' ', '')], default=NotAvailable)(self) or \ CleanText('//div[has-class("lbp-numero")]/span', replace=[(' ', '')], default=NotAvailable)(self) advisor_phone = Regexp(CleanText('//div[h3[contains(text(), "conseil")]]//span[2]', replace=[(' ', '')], default=""), '(\d+)', default="")(self) if advisor_phone.startswith(("06", "07")): self.env['phone'] = agency_phone self.env['mobile'] = advisor_phone else: self.env['phone'] = advisor_phone or agency_phone agency = CleanText('//div[h3[contains(text(), "Bureau")]]/div[not(@class)][1]')(self) or NotAvailable name = CleanText('//div[h3[contains(text(), "conseil")]]//span[1]', default=None)(self) or \ CleanText('//div[@class="lbp-font-accueil"]/div[2]/div[1]/span[1]', default=None)(self) if name: self.env['name'] = name self.env['agency'] = agency else: self.env['name'] = agency
def parse(self, el): # Trying to find vdate and unitvalue unitvalue, vdate = None, None for span in TableCell('label')(self)[0].xpath('.//span'): if unitvalue is None: unitvalue = Regexp(CleanText('.'), '^([\d,]+)$', default=None)(span) if vdate is None: vdate = None if any(x in CleanText('./parent::div')(span) for x in [u"échéance", "Maturity"]) else \ Regexp(CleanText('.'), '^([\d\/]+)$', default=None)(span) self.env['unitvalue'] = MyDecimal().filter( unitvalue) if unitvalue else NotAvailable self.env['vdate'] = Date( dayfirst=True).filter(vdate) if vdate else NotAvailable page = None link_id = Attr(u'.//a[contains(@title, "détail du fonds")]', 'id', default=None)(self) inv_id = Attr('.//a[contains(@id, "linkpdf")]', 'id', default=None)(self) if link_id and inv_id: form = self.page.get_form('//div[@id="operation"]//form') form['idFonds'] = inv_id.split('-', 1)[-1] form['org.richfaces.ajax.component'] = form[link_id] = link_id page = self.page.browser.open(form['javax.faces.encodedURL'], data=dict(form)).page if "hsbc.fr" in self.page.browser.BASEURL: # special space for HSBC m = re.search('fundid=(\w+).+SH=(\w+)', CleanText('//complete', default="")(page.doc)) if m: # had to put full url to skip redirections. page = page.browser.open( 'https://www.assetmanagement.hsbc.com/feedRequest?feed_data=gfcFundData&cod=FR&client=FCPE&fId=%s&SH=%s&lId=fr' % m.groups()).page elif "consulteroperations" not in self.page.browser.url: # not on history url = Regexp(CleanText('//complete'), r"openUrlFichesFonds\('(.*?)'\)", default=NotAvailable)(page.doc) if url is NotAvailable: # redirection to a useless graphplot page with url like /portal/salarie-sg/fichefonds?idFonds=XXX&source=/portal/salarie-sg/monepargne/mesavoirs assert CleanText('//redirect/@url')(page.doc) self.env['code'] = NotAvailable self.env['code_type'] = NotAvailable return if url.startswith('http://docfinder.is.bnpparibas-ip.com/'): # pdf... http://docfinder.is.bnpparibas-ip.com/api/files/040d05b3-1776-4991-aa49-f0cd8717dab8/1536 self.env['code'] = NotAvailable self.env['code_type'] = NotAvailable return match = re.match( r'http://www.cpr-am.fr/fr/fonds_detail.php\?isin=([A-Z0-9]+)', url) match = match or re.match( r'http://www.cpr-am.fr/particuliers/product/view/([A-Z0-9]+)', url) if match: self.env['code'] = match.group(1) self.env['code_type'] = Investment.CODE_TYPE_ISIN return page = self.page.browser.open(url).page try: self.env['code'] = page.get_code() self.env['code_type'] = page.CODE_TYPE # Handle page is None and page has not get_code method except AttributeError: self.env['code'] = NotAvailable self.env['code_type'] = NotAvailable
def parse(self, el): # Trying to find vdate and unitvalue unitvalue, vdate = None, None for span in TableCell('label')(self)[0].xpath('.//span'): if unitvalue is None: unitvalue = Regexp(CleanText('.'), '^([\d,]+)$', default=None)(span) if vdate is None: vdate = None if any(x in CleanText('./parent::div')(span) for x in [u"échéance", "Maturity"]) else \ Regexp(CleanText('.'), '^([\d\/]+)$', default=None)(span) self.env['unitvalue'] = MyDecimal().filter(unitvalue) if unitvalue else NotAvailable self.env['vdate'] = Date(dayfirst=True).filter(vdate) if vdate else NotAvailable page = None link_id = Attr(u'.//a[contains(@title, "détail du fonds")]', 'id', default=None)(self) inv_id = Attr('.//a[contains(@id, "linkpdf")]', 'id', default=None)(self) if link_id and inv_id: form = self.page.get_form('//div[@id="operation"]//form') form['idFonds'] = inv_id.split('-', 1)[-1] form['org.richfaces.ajax.component'] = form[link_id] = link_id page = self.page.browser.open(form['javax.faces.encodedURL'], data=dict(form)).page if "hsbc.fr" in self.page.browser.BASEURL: # special space for HSBC m = re.search('fundid=(\w+).+SH=(\w+)', CleanText('//complete', default="")(page.doc)) if m: # had to put full url to skip redirections. page = page.browser.open('https://www.assetmanagement.hsbc.com/feedRequest?feed_data=gfcFundData&cod=FR&client=FCPE&fId=%s&SH=%s&lId=fr' % m.groups()).page elif "consulteroperations" not in self.page.browser.url: # not on history url = Regexp(CleanText('//complete'), r"openUrlFichesFonds\('(.*?)',true|false\).*", default=NotAvailable)(page.doc) if url is NotAvailable: # redirection to a useless graphplot page with url like /portal/salarie-sg/fichefonds?idFonds=XXX&source=/portal/salarie-sg/monepargne/mesavoirs # or on bnp, look for plot display function in a script assert CleanText('//redirect/@url')(page.doc) or CleanText('//script[contains(text(), "afficherGraphique")]')(page.doc) self.env['code'] = NotAvailable self.env['code_type'] = NotAvailable return useless_urls = ( # pdf... http://docfinder.is.bnpparibas-ip.com/api/files/040d05b3-1776-4991-aa49-f0cd8717dab8/1536 'http://docfinder.is.bnpparibas-ip.com/', # Redirection to a useless page with url like "https://epargne-salariale.axa-im.fr/fr/" 'https://epargne-salariale.axa-im.fr/fr/', ) for useless_url in useless_urls: if url.startswith(useless_url): self.env['code'] = NotAvailable self.env['code_type'] = NotAvailable return match = re.match(r'http://www.cpr-am.fr/fr/fonds_detail.php\?isin=([A-Z0-9]+)', url) match = match or re.match(r'http://www.cpr-am.fr/particuliers/product/view/([A-Z0-9]+)', url) if match: self.env['code'] = match.group(1) self.env['code_type'] = Investment.CODE_TYPE_ISIN return if url.startswith('http://fr.swisslife-am.com/fr/'): self.page.browser.session.cookies.set('location', 'fr') self.page.browser.session.cookies.set('prof', 'undefined') page = self.page.browser.open(url).page try: self.env['code'] = page.get_code() self.env['code_type'] = page.CODE_TYPE # Handle page is None and page has not get_code method except AttributeError: self.env['code'] = NotAvailable self.env['code_type'] = NotAvailable