class EventDetails(ItemElement): klass = CentQuatreEvent obj_id = Env('fileId') obj_start_date = DateTime(CleanText(u'//*[@class="date"]')) obj_end_date = Eval(lambda x: x + timedelta(hours=1), obj_start_date) obj_timezone = u'Europe/Paris' obj_summary = CleanText( u'//*[@class="content_product_info"]//*[contains(@class, "title")]' ) obj_city = u'Paris' obj_location = Format("%s, %s", CleanText(u'(//*[@class="location"])[1]'), CleanText(u'//*[@class="address"]')) obj_category = CATEGORIES.SPECTACLE obj_price = CleanDecimal( CleanText(u'(//*[@class="unit_price with_beneficiary"])[1]')) obj_description = Format( u'%s. %s. %.2f€.', CleanText( u'(//*[contains(@class, "tariff") and contains(@class, "with_beneficiary")])[1]' ), CleanText( u'(//*[contains(@class, "seat") and contains(@class, "with_beneficiary")])[1]' ), obj_price, ) obj_ticket = TICKET.AVAILABLE def obj_url(self): return (u'%s%s' % (self.page.browser.BASEURL, Link(u'//*[@class="alternative_button mticket"]/a') (self)))
class item(ItemElement): klass = Document obj_id = Format( '%s_%s', Env('subid'), Regexp(CleanText('./@data-module-open-link--link'), '#/details/(.*)'), ) obj_format = 'pdf' # eg when formatted (not complete list): # - Situation de contrat suite à réajustement automatique Assurance Vie N° XXXXXXXXXX # - Lettre d'information client Assurance Vie N° XXXXXXXXXX # - Attestation de rachat partiel Assurance Vie N° XXXXXXXXXXXXXX obj_label = Format( '%s %s %s', CleanText('.//h3[@class="card-title"]'), CleanText('.//div[@class="sticker-content"]//strong'), CleanText('.//p[@class="contract-info"]'), ) obj_date = Date(CleanText('.//p[@class="card-date"]'), parse_func=parse_french_date) obj_type = 'document' obj__download_id = Regexp(CleanText('./@data-url'), r'.did_(.*?)\.')
class get_video(ItemElement): klass = RmllVideo obj_id = CleanHTML('/html/head/meta[@property="og:url"]/@content' ) & CleanText() & Regexp( pattern=r'.*/permalink/(.+)/$') obj_title = Format( u'%s', CleanHTML('/html/head/meta[@name="DC.title"]/@content') & CleanText()) obj_description = Format( u'%s', CleanHTML('/html/head/meta[@property="og:description"]/@content') & CleanText()) def obj_thumbnail(self): url = NormalizeThumbnail( CleanText('/html/head/meta[@property="og:image"]/@content'))( self) if url: thumbnail = Thumbnail(url) thumbnail.url = thumbnail.id return thumbnail def obj_url(self): links = XPath( '//div[@id="download_links"]/div[@class="paragraph"]/div[has-class("share")]/a[@target="_blank"]/@href' )(self) for link in links: ext = str(link).split('.')[-1] self.logger.debug("Link:%s Ext:%s", link, ext) if ext in ['mp4', 'webm']: return self.page.browser.BASEURL + unicode(link)
class account(ItemElement): klass = Account def condition(self): return '/outil/UWLM/ListeMouvement' in self.el.attrib['onclick'] NATURE2TYPE = {'001': Account.TYPE_SAVINGS, '005': Account.TYPE_CHECKING, '006': Account.TYPE_CHECKING, '007': Account.TYPE_SAVINGS, '012': Account.TYPE_SAVINGS, '023': Account.TYPE_CHECKING, '046': Account.TYPE_SAVINGS, '047': Account.TYPE_SAVINGS, '049': Account.TYPE_SAVINGS, '068': Account.TYPE_PEA, '069': Account.TYPE_SAVINGS, } obj__link_id = Format('%s&mode=190', Regexp(CleanText('./@onclick'), "'(.*)'")) obj__agence = Regexp(Field('_link_id'), r'.*agence=(\w+)') obj__compte = Regexp(Field('_link_id'), r'compte=(\w+)') obj_id = Format('%s%s', Field('_agence'), Field('_compte')) obj__transfer_id = Format('%s0000%s', Field('_agence'), Field('_compte')) obj__coming_links = [] obj_label = CleanText('.//div[@class="libelleCompte"]') obj_balance = MyDecimal('.//td[has-class("right")]', replace_dots=True) obj_currency = FrenchTransaction.Currency('.//td[has-class("right")]') obj_type = Map(Regexp(Field('_link_id'), r'.*nature=(\w+)'), NATURE2TYPE, default=Account.TYPE_UNKNOWN) obj__market_link = None
class item(ItemElement): klass = Document obj_id = Format( '%s_%s%s', Env('sub_id'), Regexp(CleanText('.//a/@title'), r' (\d{2}) '), CleanText('.//span[contains(@class, "date")]', symbols='/')) obj_label = Format('%s - %s', CleanText('.//span[contains(@class, "lib")]'), CleanText('.//span[contains(@class, "date")]')) obj_url = Format( '/voscomptes/canalXHTML/relevePdf/relevePdf_historique/%s', Link('./a')) obj_format = 'pdf' obj_type = DocumentTypes.OTHER def obj_date(self): date = CleanText('.//span[contains(@class, "date")]')(self) m = re.search(r'(\d{2}/\d{2}/\d{4})', date) if m: return Date(CleanText('.//span[contains(@class, "date")]'), dayfirst=True)(self) else: return Date(Format( '%s/%s', Regexp(CleanText('.//a/@title'), r' (\d{2}) '), CleanText('.//span[contains(@class, "date")]')), dayfirst=True)(self)
class item(ItemElement): klass = BaseAudio def condition(self): return Dict('path_mp3')(self) obj_id = BaseAudioIdFilter(Format(u'%s.%s', Env('radio_id'), Dict('nid'))) obj_format = u'mp3' obj_ext = u'mp3' obj_title = Format(u'%s : %s', Dict('title_emission'), Dict('title_diff')) obj_description = Dict('desc_emission', default=u'') obj_author = Join(u', ', Dict('personnes', default=u'')) obj_url = Dict('path_mp3') def obj_thumbnail(self): if 'path_img_emission' in self.el: thumbnail = Thumbnail(Dict('path_img_emission')(self)) thumbnail.url = thumbnail.id return thumbnail def obj_duration(self): fin = Dict('fin')(self) debut = Dict('debut')(self) if debut and fin: return timedelta(seconds=int(fin) - int(debut))
class get_unique_card(ItemElement): item_xpath = '//table[@class="ca-table"][@summary]' klass = Account # Transform 'n° 4999 78xx xxxx xx72' into '499978xxxxxxxx72' obj_number = CleanText('//table[@class="ca-table"][@summary]//tr[@class="ligne-impaire"]/td[@class="cel-texte"][1]', replace=[(' ', ''), ('n°', '')]) # Card ID is formatted as '499978xxxxxxxx72MrFirstnameLastname-' obj_id = Format('%s%s', Field('number'), CleanText('//table[@class="ca-table"][@summary]//caption[@class="caption"]//b', replace=[(' ', '')])) # Card label is formatted as 'Carte VISA Premier - Mr M Lastname' obj_label = Format('%s - %s', CleanText('//table[@class="ca-table"][@summary]//tr[@class="ligne-impaire ligne-bleu"]/th[@id="compte-1"]'), CleanText('//table[@class="ca-table"][@summary]//caption[@class="caption"]//b')) obj_balance = CleanDecimal(0) obj_coming = CleanDecimal.French('//table[@class="ca-table"][@summary]//tr[@class="ligne-paire"]//td[@class="cel-num"]', default=0) obj_currency = Currency(Regexp(CleanText('//th[contains(text(), "Montant en")]'), r'^Montant en (.*)')) obj_type = Account.TYPE_CARD obj__form = None
class item(ItemElement): klass = Bill obj_id = Format('facture-%s-%s-%s#%s', Slugify(CleanText(TableCell('date'))), Slugify(CleanText(TableCell('amount'))), Slugify(CleanText(TableCell('type'))), Env('sub_id')) obj_url = AbsoluteLink('./td[5]//a', default=NotAvailable) obj_date = Date(CleanText(TableCell('date')), dayfirst=True) obj_label = Format('%s %s %s', CleanText(TableCell('type')), CleanText(TableCell('amount')), CleanText(TableCell('date'))) obj_type = DocumentTypes.BILL obj_price = CleanDecimal(TableCell('amount'), replace_dots=True) obj_currency = Currency(TableCell('amount')) obj_duedate = Date(Regexp(CleanText(TableCell('status')), r'le (\d+)/(\d+)/(\d+)', r'\1/\2/\3'), dayfirst=True) def obj_format(self): if self.obj_url(self): return 'pdf' return NotAvailable def obj_income(self): if self.obj_price(self) < 0: return True return False
class item(ItemElement): klass = Forecast obj_id = CleanText('./div/h4/text()') #Date (example: 'Mar 27') obj_date = Format( '%s %s', CleanText('./div/h3/a/text()'), CleanText('./div/h4/text()') ) #Day of the week (example: 'Sat') and Date (example: 'Mar 27') obj_text = Format('- %s', CleanText('.//span[@class="cond"]/text()') ) #Weather prediction (example: 'Partly cloudy') def obj_low(self): temp = CleanText('.//span[@class="small-temp"]/text()')(self) # For some of the low temperature elements, the value ends with '°', for some others it ends with '°C' or '°F': if temp.endswith('C') or temp.endswith('F'): temp = temp[1:-2] else: temp = temp[1:-1] unit = CleanText('//span[@class="local-temp"]/text()')(self) unit = unit[-1] return Temperature(float(temp), unit) def obj_high(self): temp = CleanText('.//span[@class="large-temp"]/text()')(self) # Eliminating the '°' at the end: temp = temp[:-1] unit = CleanText('//span[@class="local-temp"]/text()')(self) unit = unit[-1] return Temperature(float(temp), unit)
class item(ItemElement): klass = Account obj_type = Account.TYPE_CARD obj_currency = 'EUR' obj_number = CleanText(TableCell('number')) obj_label = Format('%s %s', CleanText(TableCell('label')), obj_number) obj_id = Format('%s.%s', Env('parent_id'), obj_number) def obj_coming(self): comings = (CleanDecimal(TableCell('balance', default=None), replace_dots=True, default=None)(self), CleanDecimal(TableCell('_credit', default=None), replace_dots=True, default=None)(self), CleanDecimal(TableCell('_debit', default=None), replace_dots=True, default=None)(self)) for coming in comings: if not empty(coming): return coming else: # There should have at least 0.00 in debit column assert False def obj_url(self): td = TableCell('label')(self)[0].xpath('.//a')[0] return urljoin(self.page.url, td.attrib['href'])
class item(ItemElement): klass = Account def condition(self): # Ignore cards that do not have a coming return CleanText('.//tr[1]/td[@class="cel-num"]')(self) # Transform 'n° 4999 78xx xxxx xx72' into '499978xxxxxxxx72' obj_number = CleanText('.//caption/span[@class="tdb-cartes-num"]', replace=[(' ', ''), ('n°', '')]) # The raw number is used to access multiple cards details obj__raw_number = CleanText( './/caption/span[@class="tdb-cartes-num"]') # Multiple card IDs are formatted as '499978xxxxxxxx72MrFirstnameLastname' obj_id = Format( '%s%s', Field('number'), CleanText('.//caption/span[@class="tdb-cartes-prop"]', replace=[(' ', '')])) # Card label is formatted as 'Carte VISA Premier - Mr M Lastname' obj_label = Format( '%s - %s', CleanText('.//caption/span[has-class("tdb-cartes-carte")]'), CleanText('.//caption/span[has-class("tdb-cartes-prop")]')) obj_type = Account.TYPE_CARD obj_balance = CleanDecimal(0) obj_coming = CleanDecimal.French( './/tr[1]/td[position() = last()]', default=0) obj_currency = Currency( Regexp(CleanText('//span[contains(text(), "Montants en")]'), r'^Montants en (.*)')) obj__form = None
class item(ItemElement): klass = Bill obj__simple_id = CleanText( './/div[has-class("actions")]//span[has-class("value")]') obj_id = Format('%s_%s', Env('subid'), Field('_simple_id')) obj_url = Format( '/gp/css/summary/print.html/ref=oh_aui_ajax_pi?ie=UTF8&orderID=%s', Field('_simple_id')) obj_format = 'html' obj_label = Format('Facture %s', Field('_simple_id')) obj_type = 'bill' def obj_date(self): currency = Env('currency')(self) return parse_french_date( CleanText( './/div[has-class("a-col-left")]//span[has-class("value") and not(contains(., "%s"))]' % currency)(self)) def obj_price(self): currency = Env('currency')(self) return CleanDecimal( './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency, replace_dots=currency == u'EUR')(self) def obj_currency(self): currency = Env('currency')(self) return Currency( './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency)(self)
def obj_name(self): # If no advisor is displayed, we return the agency advisor. if Dict('advisorGivenName')(self) and Dict('advisorFamilyName')( self): return Format('%s %s', CleanText(Dict('advisorGivenName')), CleanText(Dict('advisorFamilyName')))(self) return Format('%s %s', CleanText(Dict('branchManagerGivenName')), CleanText(Dict('branchManagerFamilyName')))(self)
class item(ItemElement): klass = City obj_id = Format('%s %s', CleanText('./span[has-class("city")]'), CleanText('./span[@class="zipcode"]')) obj_name = Format('%s %s', CleanText('./span[has-class("city")]'), CleanText('./span[@class="zipcode"]'))
class item(ItemElement): klass = City def condition(self): return Dict('lct_parent_id')(self) != '0' obj_id = Format('%s_%s', Dict('lct_id'), Dict('lct_level')) obj_name = Format('%s %s', Dict('lct_name'), Dict('lct_post_code'))
class item(ItemElement): klass = Bill obj_id = Format('%s.%s', Env('subid'), Dict('orderId')) obj_date = Date(Dict('billingDate')) obj_format = 'pdf' obj_price = CleanDecimal(Dict('priceWithTax/value')) obj_url = Dict('pdfUrl') obj_label = Format('Facture %s', Dict('orderId'))
class item(ItemElement): klass = Bill load_details = Field('_pre_url') & AsyncLoad obj__simple_id = CleanText( './/span[contains(text(), "N° de commande")]/following-sibling::span' ) obj_id = Format('%s_%s', Env('subid'), Field('_simple_id')) obj__pre_url = Format( '/gp/shared-cs/ajax/invoice/invoice.html?orderId=%s&relatedRequestId=%s&isADriveSubscription=&isHFC=', Field('_simple_id'), Env('request_id')) obj_label = Format('Facture %s', Field('_simple_id')) obj_type = DocumentTypes.BILL def obj_date(self): date = Date(CleanText( './/div[has-class("a-span4") and not(has-class("recipient"))]/div[2]' ), parse_func=parse_french_date, dayfirst=True, default=NotAvailable)(self) if date is NotAvailable: return Date(CleanText( './/div[has-class("a-span3") and not(has-class("recipient"))]/div[2]' ), parse_func=parse_french_date, dayfirst=True)(self) return date def obj_price(self): currency = Env('currency')(self) return CleanDecimal( './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency, replace_dots=currency == u'EUR')(self) def obj_currency(self): currency = Env('currency')(self) return Currency( './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]' % currency)(self) def obj_url(self): async_page = Async('details').loaded_page(self) url = Link( '//a[contains(@href, "download")]|//a[contains(@href, "generated_invoices")]', default=NotAvailable)(async_page.doc) if not url: url = Link( '//a[contains(text(), "Imprimer un récapitulatif de commande")]' )(async_page.doc) return url def obj_format(self): if 'summary' in Field('url')(self): return 'html' return 'pdf'
class item(ItemElement): klass = Document obj_date = Date(Dict('dateDocument')) obj_format = "pdf" obj_label = Format("%s : %s", Dict('libelle1'), Dict('libelle3')) obj_type = CleanText(Dict('libelleIcone'), replace=[('Icône ', '')]) obj_id = Regexp(Dict('libelle2'), r"(\S+)\.", nth=0) obj_url = Format("/prive/telechargerdocumentremuneration/v1?documentUuid=%s", Dict('documentUuid'))
class item(ItemElement): klass = Thread obj_id = Format('%s#%s', CleanText('./@data-screen-name'), CleanText('./@data-tweet-id')) obj_title = Format('%s \n\t %s', CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a', replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]), CleanText('./div/p', replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')])) obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span | ./div/div[@class="ProfileTweet-authorDetails"]/span/a/span', 'data-time'))
def obj_content(self): if 'content' in self.el.keys(): return Format(u'%s%s\r\n', CleanHTML(Dict('content/content')), CleanText(Dict('origin/htmlUrl')))(self.el) elif 'summary' in self.el.keys(): return Format(u'%s%s\r\n', CleanHTML(Dict('summary/content')), CleanText(Dict('origin/htmlUrl')))(self.el) else: return ''
class item(ItemElement): klass = Bill obj_id = Format('%s_%s', Env('subid'), Dict('idFacture')) obj_price = CleanDecimal(Dict('mntTotFacture')) obj_url = Dict('_links/facturePDF/href') obj_date = MyDate(Dict('dateFacturation')) obj_duedate = MyDate(Dict('dateLimitePaieFacture', default=NotAvailable), default=NotAvailable) obj_label = Format('Facture %s', Dict('idFacture')) obj_format = 'pdf' obj_currency = 'EUR'
class item(ItemElement): klass = Account obj_type = Account.TYPE_CARD obj_number = Dict('numCarte') obj_id = Format('%s.%s.%s', Env('account_id'), Dict('numCarte'), Dict('idCarte')) obj_label = Format('%s %s', Dict('typeCarte'), Dict('nomPorteur')) obj__index = Dict('idCarte') obj__coming_amount = Dict('montantLigneEncours') obj__parent_iban = Env('parent_iban') obj_coming = Eval(lambda x: Decimal(x)/100, Dict('montant/montant')) obj_currency = CleanText(Dict('montant/devise'))
class item(ItemElement): klass = Bill _num = Dict('document/id') obj_id = Format('%s_%s', Env('subid'), _num) obj_date = Eval(datetime.fromtimestamp, Dict('created_at')) obj_label = Format('Facture %s', Field('id')) obj_url = Dict('document/href') obj_price = CleanDecimal(Dict('amount/amount')) obj_currency = Currency(Dict('amount/currency')) obj_format = 'pdf'
class item(ItemElement): klass = Housing obj_id = Format('%s#%s#%s', CleanText('./Idannonce'), CleanText('./Rubrique'), CleanText('./Source')) obj_title = CleanText('./Titre') obj_cost = CleanDecimal('./Prix', default=Decimal(0)) obj_currency = u'€' obj_text = Format('%s / %s', CleanText('Localisation'), CleanText('./MiniINfos')) obj_date = datetime.now obj_url = CleanText('./LienDetail')
class item(ItemElement): klass = Bill condition = lambda self: not (u"tous les relev" in CleanText('a[1]')(self.el)) and not (u'annuel' in CleanText('a[1]')(self.el)) obj_label = CleanText('a[1]', replace=[(' ', '-')]) obj_id = Format(u"%s-%s", Env('subid'), Field('label')) # Force first day of month as label is in form "janvier 2016" obj_date = Format("1 %s", Field('label')) & Date(parse_func=parse_french_date) obj_format = u"pdf" obj_type = u"bill" obj__localid = Attr('a[2]', 'onclick')
class get_thread(ItemElement): klass = Thread obj_id = Format('%s#%s', Env('user'), Env('_id')) obj_title = Format('%s \n\t %s', CleanText('//div[@class="permalink-inner permalink-tweet-container"]/div/div/div/a', replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]), CleanText('//div[@class="permalink-inner permalink-tweet-container"]/div/div/p', replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')])) obj_date = DateTime(Regexp(CleanText('//div[@class="permalink-inner permalink-tweet-container"]/div/div/div[@class="client-and-actions"]/span/span'), '(\d+:\d+).+- (.+\d{4})', '\\2 \\1'), translations=DATE_TRANSLATE_FR)
def get_advisor(self): advisor = Advisor() advisor.name = Format('%s %s', CleanText('//NomConseiller'), CleanText('//PrenomConseiller'))(self.doc) advisor.phone = CleanText('//NumeroTelephone')(self.doc) advisor.agency = CleanText('//liloes')(self.doc) advisor.address = Format('%s %s %s', CleanText('//ruadre'), CleanText('//cdpost'), CleanText('//loadre') )(self.doc) advisor.email = CleanText('//Email')(self.doc) advisor.role = "wealth" if "patrimoine" in CleanText('//LibelleNatureConseiller')(self.doc).lower() else "bank" yield advisor
class item(ItemElement): klass = Document obj_id = Format("%s_%s", Env("subscription"), Dict("periode")) obj_format = "pdf" obj_date = Date(Format("%s-12-31", Dict("periode"))) obj_label = Format("Attestation fiscale %s", Dict("periode")) obj_type = DocumentTypes.OTHER obj_url = BrowserURL( "tax_certificate_download", employer=Env("employer"), year=Dict("periode"), )
class item(ItemElement): klass = Document obj_date = Date(CleanText(TableCell('date')), dayfirst=True) obj_label = Format('%s %s', CleanText(TableCell('label')), CleanText(TableCell('date'))) obj_id = Format('%s_%s', Env('sub_id'), CleanText(TableCell('date'), symbols='/')) # the url uses an id depending on the page where the document is # by example, if the id is 0, # it means that it is the first document that you can find # on the page of the year XXX for the subscription YYYY obj_url = Link('.//a') obj_format = 'pdf' obj_type = DocumentTypes.OTHER
class item(ItemElement): klass = Advisor obj_name = Format('%s %s %s', Dict('data/civilite'), Dict('data/prenom'), Dict('data/nom')) obj_email = Regexp(Dict('data/mail'), '(?=\w)(.*)') obj_phone = CleanText(Dict('data/telephone'), replace=[(' ', '')]) obj_mobile = CleanText(Dict('data/mobile'), replace=[(' ', '')]) obj_fax = CleanText(Dict('data/fax'), replace=[(' ', '')]) obj_agency = Dict('data/agence') obj_address = Format('%s %s %s', Dict('data/adresseAgence'), Dict('data/codePostalAgence'), Dict('data/villeAgence'))