def obj_photos(self): photos = [] for photo in XPath('//div[@class="carrousel_slide"]/img/@src')( self): photos.append(HousingPhoto("https:{}".format(photo))) for photo in XPath('//div[@class="carrousel_slide"]/@data-lazy')( self): p = json.loads(photo) photos.append(HousingPhoto("https:{}".format(p['url']))) return photos
class item(ItemElement): klass = Story def parse(self, el): self.env['header'] = el.getprevious().xpath('.//span')[0] self.env['body'] = el.getnext().xpath('.//a') obj_id = XPath(Env('body')) & Link & Regexp(pattern=r'.*histoire=(\d+)') obj_title = CleanText('.') obj_date = XPath(Env('header')) & CleanText & Regexp(pattern=r'le (\d+)-(\d+)-(\d+)', template=r'\3-\2-\1') & Date obj_category = XPath(Env('header')) & CleanText & Regexp(pattern=u'Catégorie :\s*(.*)\s*Auteur') def obj_author(self): return Author(self.env['header'].xpath('.//a/text()')[0])
def obj_bedrooms(self): rooms_bedrooms_area = XPath('//ul[@class="item-tags"]/li')(self) if len(rooms_bedrooms_area) > 2: return CleanDecimal('//ul[@class="item-tags"]/li[2]/strong', default=NotAvailable)(self) else: return NotAvailable
def obj_photos(self): photos = [] for img in XPath('./a/img/@src')(self): if img.endswith("visuel-nophoto.png"): continue photos.append(HousingPhoto(u'%s' % img)) return photos
def obj_ingredients(self): ingredients_items = XPath( '//ul[has-class("bu_cuisine_ingredients")]/li')(self) return [ CleanText('.')(ingredients_item) for ingredients_item in ingredients_items ]
def obj_photos(self): photos = [] for photo in XPath('./photos/photo/stdurl')(self): photos.append(HousingPhoto(photo)) return photos
def obj_photos(self): photos = [] for img in XPath('//div[has-class("carousel-content")]//img/@src')(self): url = u'%s' % img.replace('75x75', '800x600') url = urljoin(self.page.url, url) # Ensure URL is absolute photos.append(HousingPhoto(url)) return photos
def obj_details(self): details = {} details["creationDate"] = Date( Regexp( CleanText( '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]' ), u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*' ), dayfirst=True )(self) honoraires = CleanText( ( '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]' ), default=None )(self) if honoraires: details["Honoraires"] = ( "{} (TTC, en sus)".format( honoraires.split(":")[1].strip() ) ) for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./span[has-class("criteria-label")]')(li) value = CleanText('./span[has-class("criteria-value")]')(li) details[label] = value return details
def obj_advert_type(self): ispro = XPath('.//span[has-class("ispro")]', default=None)(self) if ispro: return ADVERT_TYPES.PROFESSIONAL else: return ADVERT_TYPES.PERSONAL
def obj_type(self): url = BrowserURL('housing', _id=Env('_id'))(self) if 'colocation' in url: return POSTS_TYPES.SHARING elif 'location' in url: isFurnished = False for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./span[has-class("criteria-label")]')( li) if label.lower() == "meublé": isFurnished = ( CleanText('./span[has-class("criteria-value")]')( li).lower() == 'oui') if isFurnished: return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif 'vente' in url: offertype = Attr( '//button[has-class("offer-contact-vertical-phone")][1]', 'data-offertransactiontype')(self) if offertype == '4': return POSTS_TYPES.VIAGER else: return POSTS_TYPES.SALE return NotAvailable
def obj_photos(self): photos = [] for img in XPath( '//div[@class="vignette-annonce"]/a/span/img/@src')( self): photos.append(HousingPhoto(u'%s' % img)) return photos
def parse(self, el): item = XPath(u'//script[@type="application/ld+json"]')(self) json_content = CleanText(u'.', replace=[('//<![CDATA[ ', ''), (' //]]>', '')])(item[1]) self.el = json.loads(json_content)
def obj_photos(self): photos = [] for photo in XPath('./photos/photo/stdUrl')(self): photos.append(HousingPhoto(CleanText('.')(photo))) return photos
def obj_details(self): details = {} for detail in XPath('//detailAnnonce/details/detail')(self): details[CleanText('libelle')(detail)] = CleanText('valeur', default='N/A')(detail) details['Reference'] = CleanText('//detailAnnonce/reference')(self) return details
def obj_ingredients(self): i = [] ingredients = XPath('//ul[@class="ingredientsList"]/li', default=[])(self) for ingredient in ingredients: i.append(CleanText('.')(ingredient)) return i
def obj_details(self): details = {} energy = CleanText( '//div[has-class("energy-summary")]/span[@class="section-label"]|//div[has-class("energy-summary")]/div/span[@class="section-label"]', default='')(self) energy_value = CleanText( '//div[has-class("energy-summary")]/span[@class="energy-msg"]', default='')(self) if energy and energy_value: details[energy] = energy_value greenhouse = CleanText( '//div[has-class("greenhouse-summary")]/span[@class="section-label"]|//div[has-class("greenhouse-summary")]/div/span[@class="section-label"]', default='')(self) greenhouse_value = CleanText( '//div[has-class("greenhouse-summary")]/span[@class="energy-msg"]', default='')(self) if greenhouse and greenhouse_value: details[greenhouse] = greenhouse_value for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./div[has-class("criteria-label")]')(li) value = CleanText('./div[has-class("criteria-value")]')(li) details[label] = value return details
def _get_high_tide_value(self, AM=True, jour=0): if AM: time = DateTime(CleanText( '//tr[@id="MareeJours_%s"]/td[1]/b[1]' % jour), strict=False)(self) value = CleanDecimal('//tr[@id="MareeJours_0"]/td[2]/b[1]', replace_dots=True)(self) else: time, value = None, None if len( XPath('//tr[@id="MareeJours_%s"]/td[1]/b' % jour)(self)) > 1: time = DateTime(CleanText( '//tr[@id="MareeJours_%s"]/td[1]/b[2]' % jour), strict=False, default=None)(self) value = CleanDecimal('//tr[@id="MareeJours_0"]/td[2]/b[2]', replace_dots=True, default=None)(self) if time and value: measure = GaugeMeasure() measure.level = float(value) measure.date = time + timedelta(days=jour) return measure
def obj_station(self): return ", ".join([ station.text for station in XPath( '//ul[has-class("item-metro")]//span[has-class("label")]' )(self) ])
def obj_photos(self): photos = [] for img in XPath( '//div[@class="carousel-content"]/ul/li/a/img/@src|//div[@class="carousel"]/ul/li/a/img/@src' )(self): photos.append(HousingPhoto(u'%s' % img)) return photos
def get_france_culture_podcasts_url(self): for a in XPath('//a[has-class("podcast-link-rss")]')(self.doc): emission_id = Regexp( CleanText('./@href'), 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml', default=None)(a) if emission_id: return emission_id
def obj_instructions(self): instructions = '' instructions_items = XPath( '//div[has-class("grid_line")]' '/ol/li[has-class("bu_cuisine_recette_prepa")]')(self) for item in instructions_items: instructions += '\n\n%s' % (CleanText('.')(item)) return instructions.strip()
def obj_url(self): links = XPath( '//div[@id="download_links"]/div[@class="paragraph"]/div[has-class("share")]/a[@target="_blank"]/@href' )(self) for link in links: ext = str(link).split('.')[-1] self.logger.debug("Link:%s Ext:%s", link, ext) if ext in ['mp4', 'webm']: return self.page.browser.BASEURL + unicode(link)
def parse(self, el): _json = CleanText('.')( XPath('//script[@type="application/ld+json"][1]')(el)[0]) try: from weboob.tools.json import json self.env['_json'] = json.loads(_json) except ValueError: self.env['_json'] = {}
def obj_photos(self): photos = [] for img in XPath( '//a[@class="thumbnail-link"]/img[@itemprop="image"]')( self): url = Regexp(CleanText('./@src'), r'http://thbr\.figarocms\.net.*(http://.*)')(img) photos.append(HousingPhoto(url)) return photos
def obj_url(self): links = XPath( '//div[@id="tab_sharing_content"]/div/div/div[@class="paragraph"]/div[@class="share"]/a[@target="_blank"]/@href' )(self) for link in links: ext = str(link).split('.')[-1] self.logger.debug("Link:%s Ext:%s", link, ext) if ext in ['mp4', 'webm']: return unicode(link)
def obj_photos(self): photos = [] for photo in XPath('./photos/photo')(self): url = CleanText('bigUrl', default=None)(photo) if not url: url = CleanText('stdUrl', default=None)(photo) photos.append(HousingPhoto(url)) return photos
def parse(self, el): for el in XPath('//dl[@class="icon-group"]/dt')(el): dt = CleanText('.')(el) if dt == u'Type de contrat': self.obj.contract_type = CleanText( './following-sibling::dd[1]')(el) elif dt == u'Salaire': self.obj.pay = Regexp( CleanText('./following-sibling::dd[1]'), u'Salaire : (.*)')(el)
def obj_details(self): details = {} energy_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]', default=None )(self) if energy_value and len(energy_value) > 1: energy_value = energy_value.replace("DPE", "").strip()[0] if energy_value not in ["A", "B", "C", "D", "E", "F", "G"]: energy_value = None if energy_value is None: energy_value = NotAvailable details["DPE"] = energy_value greenhouse_value = CleanText( '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]', default=None )(self) if greenhouse_value and len(greenhouse_value) > 1: greenhouse_value = greenhouse_value.replace("GES", "").strip()[0] if greenhouse_value not in ["A", "B", "C", "D", "E", "F", "G"]: greenhouse_value = None if greenhouse_value is None: greenhouse_value = NotAvailable details["GES"] = greenhouse_value details["creationDate"] = Date( Regexp( CleanText( '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]' ), u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*' ), dayfirst=True )(self) honoraires = CleanText( ( '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]' ), default=None )(self) if honoraires: details["Honoraires"] = ( "{} (TTC, en sus)".format( honoraires.split(":")[1].strip() ) ) for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./div[has-class("criteria-label")]')(li) value = CleanText('./div[has-class("criteria-value")]')(li) details[label] = value return details
def get_phone_url_datas(self): a = XPath('//a[has-class("phone-link")]')(self.doc)[0] urlcontact = CleanText('./@data-urlcontact')(a) params = {'univers': CleanText('./@data-univers')(a)} params['pushcontact'] = CleanText('./@data-pushcontact')(a) params['mapper'] = CleanText('./@data-mapper')(a) params['offerid'] = CleanText('./@data-offerid')(a) params['offerflag'] = CleanText('./@data-offerflag')(a) params['campaign'] = CleanText('./@data-campaign')(a) params['xtpage'] = CleanText('./@data-xtpage')(a) return urlcontact, params
def obj_bedrooms(self): rooms_bedrooms_area = XPath( './div[@class="box-body"]/div/div/div[@class="clearfix"]/ul[has-class("item-summary")]/li' )(self) if len(rooms_bedrooms_area) > 2: return CleanDecimal( './div[@class="box-body"]/div/div/div[@class="clearfix"]/ul[has-class("item-summary")]/li[2]/strong', default=NotAvailable )(self) else: return NotAvailable