def obj_coming(self): page = self.page.browser.open( BrowserURL('account_coming', identifiant=Field('iban'))(self)).page coming = Eval( lambda x, y: x / 10**y, CleanDecimal( Dict('infoOperationsAvenir/cumulTotal/montant', default='0')), CleanDecimal( Dict('infoOperationsAvenir/cumulTotal/nb_dec', default='0')))(page.doc) # this so that card coming transactions aren't accounted twice in the total incoming amount for el in Dict('infoOperationsAvenir/natures')(page.doc): if Dict('nature/libelle')( el) == "Factures / Retraits cartes": coming_carte = Eval( lambda x, y: x / 10**y, CleanDecimal( Dict('cumulNatureMere/montant', default='0')), CleanDecimal( Dict('cumulNatureMere/nb_dec', default='0')))(el) coming -= coming_carte break return coming
class get_price(ItemElement): klass = Price obj_id = Env('_id') obj_cost = CleanDecimal( '//div[@class="mainInfos"]/div/p[@class="gpfzj"]') obj_currency = Regexp( CleanText('//div[@class="mainInfos"]/div/p[@class="gpfzj"]'), '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') obj_message = Format('%s %s', CleanText('//div[@class="mainInfos"]/div/div/h1'), CleanText('//div[@class="mainInfos"]/div/div/p')) obj_url = BrowserURL('advert_page', _id=Env('_id')) def obj_shop(self): shop = Shop(Env('_id')(self)) shop.name = Regexp( CleanText( '(//div[@xtcz="contacter_le_vendeur"]/div/ul/li)[1]'), 'Nom : (.*)')(self) shop.location = JSVar(CleanText('//script'), var='tooltip')(self) shop.info = CleanText( '//div[@xtcz="contacter_le_vendeur"]/div/ul/li[has-class("printPhone")]' )(self) return shop obj_product = LaCentraleProduct()
def next_page(self): page = Regexp(CleanText('//link[@rel="next"]/@href', default=''), '.*pg=(\d*)', default=None)(self) return BrowserURL('adv_search', search=Env('search'), page=int(page))(self)
class item(ItemElement): klass = Subscription load_details = BrowserURL('profilpage') & AsyncLoad obj_subscriber = Env('subscriber') obj_label = Env('subid') obj_id = obj_label obj__multi = Env('multi') def parse(self, el): subscriber = Async( 'details', CleanText( u'//span[contains(text(), "prénom / nom")]/following-sibling::span[1]' ))(self) self.env['subscriber'] = subscriber if subscriber else \ Async('details', Format('%s %s %s', \ CleanText(u'//*[contains(text(), "civilité")]/following-sibling::*[1]'), \ CleanText(u'//*[contains(text(), "prénom")]/following-sibling::*[1]'), \ CleanText(u'//*[text() = "nom :"]/following-sibling::*[1]')))(self) subid = Regexp(Attr('.', 'ecareurl', default="None"), 'idContrat=(\d+)', default=None)(self) self.env[ 'subid'] = subid if subid else self.page.browser.username self.env['multi'] = True if subid else False # Prevent from available account but no added in customer area if subid and not json.loads( self.page.browser.open(Attr( '.', 'ecareurl')(self)).content)['html']: raise SkipItem()
class item(ItemElement): klass = Bill obj_date = Date(Dict('dueDate'), parse_func=parse_french_date, default=NotAvailable) obj_price = CleanDecimal(Dict('amountIncludingTax')) obj_format = 'pdf' def obj_label(self): return 'Facture du %s' % Field('date')(self) def obj_id(self): return '%s_%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y')) def get_params(self): params = { 'billid': Dict('id')(self), 'billDate': Dict('dueDate')(self) } return urlencode(params) obj_url = BrowserURL('doc_api_pro', subid=Env('subid'), dir=Dict('documents/0/mainDir'), fact_type=Dict('documents/0/subDir'), billparams=get_params) obj__is_v2 = False
class get_job_advert(ItemElement): klass = BaseJobAdvert obj_description = CleanText( Join('\n', '//div[@id="annonce-detail"]/p[@class="text"]', textCleaner=CleanHTML)) obj_id = Env('_id') obj_url = BrowserURL('advert_page', _id=Env('_id')) obj_publication_date = Date(Regexp( CleanText('//div[@id="annonce-detail"]/p[@class="infos"]'), '(\d{2}/\d{2}/\d{4})', default=NotAvailable), default=NotAvailable) obj_title = CleanText('//div[@id="annonce"]/div/div/h1') obj_society_name = CleanText('//section[@class="entp-resume"]/h1/a') obj_contract_type = CleanText( '//dl[@class="infos-annonce"]/dt[span[@class="picto picto-contrat-grey"]]/following-sibling::dd[1]' ) obj_place = CleanText( '//dl[@class="infos-annonce"]/dt[span[@class="picto picto-geolocalisation-grey"]]/following-sibling::dd[1]' ) obj_pay = CleanText( '//div[@id="annonce-detail"]/p[@class="infos"]/preceding-sibling::p[1]', replace=[('Salaire : ', '')])
class get_housing(ItemElement): klass = Housing def parse(self, el): details = dict() self.env['area'] = NotAvailable for item in el.xpath('//div[@class="line"]/h2'): if 'Surface' in CleanText('./span[@class="property"]')(item): self.env['area'] = CleanDecimal( Regexp(CleanText('./span[@class="value"]'), '(.*)m.*'), replace_dots=(',', '.'))(item) else: key = u'%s' % CleanText('./span[@class="property"]')(item) if 'GES' in key or 'Classe' in key: details[key] = CleanText( './span[@class="value"]/noscript/a')(item) else: details[key] = CleanText('./span[@class="value"]')( item) self.env['details'] = details obj_id = Env('_id') obj_title = CleanText('//title') obj_cost = CleanDecimal('//h2[@itemprop="price"]/@content', default=Decimal(0)) obj_currency = Regexp( CleanText('//h2[@itemprop="price"]/span[@class="value"]'), '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') obj_text = CleanText('//meta[@name="description"]/@content') obj_location = CleanText('//span[@itemprop="address"]') obj_details = Env('details') obj_area = Env('area') obj_url = BrowserURL('housing', _id=Env('_id')) def obj_date(self): _date = Regexp( CleanText('//p[has-class("line")]', replace=[(u'à', '')]), '.*Mise en ligne le (.*)')(self) for fr, en in DATE_TRANSLATE_FR: _date = fr.sub(en, _date) self.env['tmp'] = _date return DateTime(Env('tmp'), LinearDateGuesser())(self) def obj_photos(self): items = re.findall(r'images\[\d\]\s*=\s*"([\w/\.]*\.jpg)";', CleanText('//script')(self)) photos = [HousingPhoto(u'http:%s' % item) for item in items] if not photos: img = CleanText('//meta[@itemprop="image"]/@content', default=None)(self) if img: photos.append(HousingPhoto(img)) return photos
def obj_type(self): url = BrowserURL('housing', _id=Env('_id'))(self) if 'colocation' in url: return POSTS_TYPES.SHARING elif 'location' in url: isFurnished = False for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./span[has-class("criteria-label")]')( li) if label.lower() == "meublé": isFurnished = ( CleanText('./span[has-class("criteria-value")]')( li).lower() == 'oui') if isFurnished: return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif 'vente' in url: offertype = Attr( '//button[has-class("offer-contact-vertical-phone")][1]', 'data-offertransactiontype')(self) if offertype == '4': return POSTS_TYPES.VIAGER else: return POSTS_TYPES.SALE return NotAvailable
class get_job_advert(ItemElement): klass = BaseJobAdvert obj_id = Env('id') obj_url = BrowserURL('advert_page', id=Env('id')) obj_society_name = CleanText( '//td[@class="Contenu"]/table[4]/tr[1]/td[1]/a') obj_title = CleanText('//td[@class="Titre15"]') obj_description = Format( '%s\n%s', Join( '\n', u'//td[@class="Contenu"]/table[3]/tr[td/text()="Détails :"]/following-sibling::tr', textCleaner=CleanHTML), CleanHTML('//td[@class="Contenu"]/table[2]')) obj_job_name = CleanText( u'//td[@class="Contenu"]/table[3]/tr/td[text()="Poste :"]/following-sibling::td', replace=[(u'-- Indifférent --', u'')]) obj_contract_type = CleanText(CleanHTML( u'//td[@class="Contenu"]/table[3]/tr/td[text()="Contrat :"]/following-sibling::td', default=u''), replace=[(u'-- Indifférent --', u'')]) obj_pay = CleanText( u'//td[@class="Contenu"]/table[3]/tr/td[contains(text(), "Rémunération")]/following-sibling::td', default=u'') obj_place = CleanText( u'//td[@class="Contenu"]/table[3]/tr/td[contains(text(), "Région")]/following-sibling::td', default=u'', replace=[(u'-- Indifférent --', u''), (u'Lieu de travail : ', u'')])
class get_housing(ItemElement): klass = Housing obj_id = Env('_id') obj_title = Dict('characteristics/titleWithTransaction') obj_location = Format('%s %s %s', Dict('location/address'), Dict('location/postalCode'), Dict('location/cityLabel')) obj_cost = TypeDecimal(Dict('characteristics/price')) obj_currency = u'€' obj_text = CleanHTML(Dict('characteristics/description')) obj_url = BrowserURL('housing_html', _id=Env('_id')) obj_area = TypeDecimal(Dict('characteristics/area')) obj_date = FromTimestamp(Dict('characteristics/date')) def obj_photos(self): photos = [] for img in Dict('characteristics/images')(self): m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl')) if m: photos.append(HousingPhoto(m.group(1))) else: photos.append(HousingPhoto(img.get('xl'))) return photos def obj_details(self): details = {} details['fees'] = Dict('characteristics/fees')(self) details['bedrooms'] = Dict('characteristics/bedroomCount')(self) details['energy'] = Dict('characteristics/energyConsumptionCategory')(self) rooms = Dict('characteristics/roomCount')(self) if len(rooms): details['rooms'] = rooms[0] details['available'] = Dict('characteristics/available', default=NotAvailable)(self) return details
class get_housing(ItemElement): klass = Housing obj_id = Env('_id') obj_title = CleanText('//h1[@itemprop="name"]') obj_location = CleanText('//span[@class="informations-localisation"]') obj_cost = CleanDecimal('//span[@itemprop="price"]') obj_currency = Regexp(CleanText('//span[@itemprop="price"]'), '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') obj_text = CleanHTML('//div[@itemprop="description"]') obj_url = BrowserURL('housing', _id=Env('_id')) obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'), '(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable) def obj_photos(self): photos = [] for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self): url = Regexp(CleanText('./@src'), 'http://thbr\.figarocms\.net.*(http://.*)')(img) photos.append(HousingPhoto(url)) return photos def obj_details(self): details = dict() for item in XPath('//div[@class="features clearfix"]/ul/li')(self): key = CleanText('./span[@class="name"]')(item) value = CleanText('./span[@class="value"]')(item) if value and key: details[key] = value key = CleanText('//div[@class="title-dpe clearfix"]')(self) value = CleanText('//div[@class="energy-consumption"]')(self) if value and key: details[key] = value return details
def next_page(self): page = int(Env('page')(self)) + 1 return BrowserURL('webservice', page=page, lang=Env('lang'), method_name=Env('method_name'), pattern=Env('pattern'))(self)
class get_job_advert(ItemElement): klass = BaseJobAdvert obj_id = Env('id') obj_url = BrowserURL('advert', id=Env('id')) obj_title = CleanText('//div[@class="modal-body"]/h2') obj_job_name = CleanText('//div[@class="modal-body"]/h2') obj_description = CleanText('//div[has-class("description")]/p') obj_society_name = CleanText('//div[@class="media-body"]/h4') obj_experience = Join( u'- ', '//h4[contains(text(), "Exp")]/following-sibling::ul[has-class("skill-list")][1]/li', newline=True, addBefore='\n- ') obj_formation = Join( u'- ', '//h4[contains(text(), "For")]/following-sibling::ul[has-class("skill-list")][1]/li', newline=True, addBefore='\n- ') obj_place = CleanText( '//div[@class="modal-body"]/h2/following-sibling::p[1]') obj_publication_date = PoleEmploiDate( CleanText('//div[@class="modal-body"]/h2/following-sibling::p[2]')) def parse(self, el): for el in XPath('//dl[@class="icon-group"]/dt')(el): dt = CleanText('.')(el) if dt == u'Type de contrat': self.obj.contract_type = CleanText( './following-sibling::dd[1]')(el) elif dt == u'Salaire': self.obj.pay = Regexp( CleanText('./following-sibling::dd[1]'), u'Salaire : (.*)')(el)
class get_job_advert(ItemElement): klass = BaseJobAdvert obj_id = Env('_id') obj_url = BrowserURL('advert_page', _id=Env('_id')) obj_title = CleanText('//div[@class="infos-lieu"]/h1') obj_place = CleanText('//div[@class="infos-lieu"]/h2') obj_publication_date = Date( Regexp(CleanText('//div[@class="info-agency"]'), '.*Date de l\'annonce :(.*)', default='')) obj_job_name = CleanText('//div[@class="infos-lieu"]/h1') obj_description = Format( '\n%s%s', CleanHTML('//article[@id="post-description"]/div'), CleanHTML('//article[@id="poste"]')) obj_contract_type = Regexp( CleanText('//article[@id="poste"]/div/ul/li'), 'Contrat : (\w*)', default=NotAvailable) obj_pay = Regexp(CleanText('//article[@id="poste"]/div/ul/li'), 'Salaire : (.*) par mois', default=NotAvailable) obj_experience = Regexp(CleanText('//article[@id="poste"]/div/ul/li'), u'Expérience : (.* ans)', default=NotAvailable)
class get_job_advert(ItemElement): klass = BaseJobAdvert obj_id = Env('_id') obj_url = BrowserURL('expired_advert', _id=Env('_id')) obj_title = CleanText('//div[@role="alert"]') obj_description = CleanText('//div[@role="alert"]')
class get_housing(ItemElement): klass = Housing obj_id = Env('_id') obj_title = CleanText('//section[class="wctrl"]/h1') obj_cost = CleanDecimal('//span[@class="i prix"]', replace_dots=(',', '.'), default=Decimal(0)) obj_currency = Regexp(CleanText('//span[@class="i prix"]'), '.*([%s%s%s])' % (u'€', u'$', u'£'), default='') obj_text = CleanHTML('//article[@class="bloc description"]/p') obj_location = CleanText('//span[@class="i ville"]') obj_area = CleanDecimal(Regexp(CleanText('//span[@class="i"]'), '.*/(.*) m.*', default=NotAvailable), default=NotAvailable) obj_url = BrowserURL('housing', _id=Env('_id')) obj_phone = CleanText('//input[@id="hftel"]/@value') obj_date = datetime.now def obj_details(self): details = {} for detail in self.el.xpath('//span[@class="i small"]'): item = detail.text.split(':') details[item[0]] = item[1] return details def obj_photos(self): photos = [] for img in self.el.xpath('//ul[@id="ulPhotos"]/li/img/@src'): url = u'http://www.entreparticuliers.com/%s' % img photos.append(HousingPhoto(url)) return photos
def obj_coming(self): if Field('type')(self) == Account.TYPE_CHECKING: has_coming = False coming = 0 coming_operations = self.page.browser.open( BrowserURL('par_account_checking_coming', accountId=Field('id'))(self)) if CleanText('//span[@id="amount_total"]')( coming_operations.page.doc): has_coming = True coming += CleanDecimal('//span[@id="amount_total"]', replace_dots=True)( coming_operations.page.doc) if CleanText(u'.//dt[contains(., "Débit différé à débiter")]')( self): has_coming = True coming += CleanDecimal( u'.//dt[contains(., "Débit différé à débiter")]/following-sibling::dd[1]', replace_dots=True)(self) return coming if has_coming else NotAvailable return NotAvailable
class get_housing(ItemElement): klass = Housing obj_id = Env('_id') obj_title = CleanText(CleanHTML('//meta[@itemprop="name"]/@content')) obj_area = CleanDecimal(Regexp( CleanText(CleanHTML('//meta[@itemprop="name"]/@content')), '(.*?)(\d*) m\xb2(.*?)', '\\2'), default=NotAvailable) obj_cost = CleanDecimal('//*[@itemprop="price"]') obj_currency = Regexp(CleanText('//*[@itemprop="price"]'), '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') obj_date = Date( Regexp( CleanText( '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]' ), u'.* Mis à jour : (\d{2}/\d{2}/\d{4}).*')) obj_text = CleanHTML( '//div[@class="offer-description-text"]|//div[has-class("offer-description")]' ) obj_location = CleanText('//*[@itemprop="address"]') obj_url = BrowserURL('housing', _id=Env('_id')) def obj_photos(self): photos = [] for img in XPath( '//div[@class="carousel-content"]/ul/li/a/img/@src|//div[@class="carousel"]/ul/li/a/img/@src' )(self): photos.append(HousingPhoto(u'%s' % img)) return photos def obj_details(self): details = {} energy = CleanText( '//div[has-class("energy-summary")]/span[@class="section-label"]|//div[has-class("energy-summary")]/div/span[@class="section-label"]', default='')(self) energy_value = CleanText( '//div[has-class("energy-summary")]/span[@class="energy-msg"]', default='')(self) if energy and energy_value: details[energy] = energy_value greenhouse = CleanText( '//div[has-class("greenhouse-summary")]/span[@class="section-label"]|//div[has-class("greenhouse-summary")]/div/span[@class="section-label"]', default='')(self) greenhouse_value = CleanText( '//div[has-class("greenhouse-summary")]/span[@class="energy-msg"]', default='')(self) if greenhouse and greenhouse_value: details[greenhouse] = greenhouse_value for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./div[has-class("criteria-label")]')(li) value = CleanText('./div[has-class("criteria-value")]')(li) details[label] = value return details
def next_page(self): link = Link('//a[@title="Page suivante" and @href]', default=None)(self) if link: form = self.page.get_form('//form') form['__EVENTTARGET'] = re.search( 'PostBackOptions[^\w]+([^"]+)', link).group(1) return requests.Request("POST", BrowserURL('investment', \ page=None)(self).replace('None', form.url), data=dict(form))
def next_page(self): str_results = Regexp( CleanText('//div[@class="m_resultats_recherche_titre"]'), '.* - (\d* / \d*) .*')(self) results = str_results.split('/') if int(results[0]) - int(results[1]) < 10: return BrowserURL('search', pattern=Env('pattern'), start=int(results[0]))(self)
def next_page(self): page = Regexp(CleanText('//a[@data-action="fetch"]/@href', default=''), '.*page=(\d*)', default=None)(self) if page: return BrowserURL('adv_search', search=Env('search'), page=int(page))(self)
class get_event(ItemElement): klass = HybrideCalendarEvent obj_id = Decode(Env('_id')) obj_start_date = Date(CleanText('//span[@class="itemDateCreated"]')) obj_summary = CleanText('//h2[@class="itemTitle"]') obj_description = Format('%s\n%s', CleanHTML('//div[@class="itemIntroText"]'), CleanHTML('//div[@class="itemFullText"]')) obj_url = BrowserURL('event_page', _id=Env('_id'))
def next_page(self): page_nb = Dict('navigation/pagination/page')(self) max_results = Dict('navigation/pagination/maxResults')(self) results_per_page = Dict('navigation/pagination/resultsPerPage')( self) if int(max_results) / int(results_per_page) > int(page_nb): return BrowserURL('search', query=Env('query'), page_number=int(page_nb) + 1)(self)
def next_page(self): first_item = Regexp(CleanText( '//li[@class="suiv"]/a/@data-searchparams', default=None), 'b=(.*)&q=.*', default=None) if first_item(self): return BrowserURL('search_page', pattern=Env('pattern'), type=Env('type'), first_item=first_item)(self)
def obj_coming(self): page = self.page.browser.open( BrowserURL('account_coming', identifiant=Field('iban'))(self)).page return Eval( lambda x, y: x / 10**y, CleanDecimal( Dict('infoOperationsAvenir/cumulTotal/montant', default='0')), CleanDecimal( Dict('infoOperationsAvenir/cumulTotal/nb_dec', default='0')))(page.doc)
def load_details(self): a = self.el.xpath( ".//img[@src='../../images/commun/loupe.png']") if len(a) > 0: values = a[0].get('onclick').replace('OpenDetailOperation(', '') \ .replace(')', '').replace(' ', '').replace("'", '').split(',') keys = ["nummvt", "&numads", "dtmvt", "typmvt"] data = dict(zip(keys, values)) url = BrowserURL('history_detail')(self) r = self.page.browser.async_open(url=url, data=data) return r return None
class get_job_advert(ItemElement): klass = BaseJobAdvert obj_id = Env('_id') obj_url = BrowserURL('advert', _id=Env('_id')) obj_title = CleanText('//h2') obj_description = Format('%s\n%s', CleanHTML('//div[@id="JobDescription"]'), CleanText('//dl')) obj_contract_type = CleanText('(//dl/dt[text()="Type de contrat"]/following-sibling::dd)[1]') obj_society_name = CleanText('//div[@data-jsux="aboutCompany"]/div/dl/dd') obj_place = CleanText('//h3') obj_publication_date = MonsterDate(CleanText('(//dl/dt[starts-with(text(),"Publi")]/following-sibling::dd)[1]'))
class get_lyrics(ItemElement): klass = SongLyrics def condition(self): return not XPath('//div[has-class("lyric-no-data")]')(self) obj_id = Env('id') obj_url = BrowserURL('songLyrics', id=Env('id')) obj_content = CleanHTML('//pre[@id="lyric-body-text"]', default=NotAvailable) obj_title = CleanText('//h2[@id="lyric-title-text"]') obj_artist = CleanText('//h3[@class="lyric-artist"]/a[1]', default=NotAvailable)
class item(ItemElement): klass = Document obj_id = Format("%s_%s", Env("subscription"), Dict("periode")) obj_format = "pdf" obj_date = Date(Format("%s-12-31", Dict("periode"))) obj_label = Format("Attestation fiscale %s", Dict("periode")) obj_type = DocumentTypes.OTHER obj_url = BrowserURL( "tax_certificate_download", employer=Env("employer"), year=Dict("periode"), )
class get_job_advert(ItemElement): klass = BaseJobAdvert obj_id = Env('_id') obj_url = BrowserURL('advert', _id=Env('_id')) obj_title = CleanText('//div[@id="jobcopy"]/h1[@itemprop="title"]') obj_description = CleanHTML('//div[@id="jobBodyContent"]') obj_contract_type = Join('%s ', '//dd[starts-with(@class, "multipledd")]') obj_society_name = CleanText('//dd[@itemprop="hiringOrganization"]') obj_place = CleanText('//span[@itemprop="jobLocation"]') obj_pay = CleanText('//span[@itemprop="baseSalary"]') obj_formation = CleanText('//span[@itemprop="educationRequirements"]') obj_experience = CleanText('//span[@itemprop="qualifications"]')