class get_housing(ItemElement): klass = Housing obj_id = Env('_id') obj_title = CleanText('//h1[@itemprop="name"]') obj_location = CleanText('//span[@class="informations-localisation"]') obj_cost = CleanDecimal('//span[@itemprop="price"]') obj_currency = Currency('//span[@itemprop="price"]') obj_text = CleanHTML('//div[@itemprop="description"]') obj_url = BrowserURL('housing', _id=Env('_id')) obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'), r'(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable) obj_price_per_meter = PricePerMeterFilter() def obj_photos(self): photos = [] for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self): url = Regexp(CleanText('./@src'), r'http://thbr\.figarocms\.net.*(http://.*)')(img) photos.append(HousingPhoto(url)) return photos def obj_details(self): details = dict() for item in XPath('//div[@class="features clearfix"]/ul/li')(self): key = CleanText('./span[@class="name"]')(item) value = CleanText('./span[@class="value"]')(item) if value and key: details[key] = value key = CleanText('//div[@class="title-dpe clearfix"]')(self) value = CleanText('//div[@class="energy-consumption"]')(self) if value and key: details[key] = value return details
def obj_type(self): idType = int(Env('query_type')(self)) type = next(k for k, v in TYPES.items() if v == idType) if type == POSTS_TYPES.FURNISHED_RENT: # SeLoger does not let us discriminate between furnished and not furnished. return POSTS_TYPES.RENT return type
def next_page(self): data = Env('data')(self) if data['offset'] > self.page.doc['total_all']: return data['offset'] = data['offset'] + data['limit'] return requests.Request("POST", self.page.url, data=json.dumps(data))
def next_page(self): page_nb = Dict('navigation/pagination/page')(self) max_results = Dict('navigation/counts/count')(self) results_per_page = Dict('navigation/pagination/resultsPerPage')( self) if int(max_results) / int(results_per_page) > int(page_nb): return BrowserURL('search', query=Env('query'), page_number=int(page_nb) + 1)(self)
def obj_type(self): type = Env('type')(self) if type == 'location': if 'appartement-meuble' in self.page.url: return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif type == 'achat': return POSTS_TYPES.SALE else: return NotAvailable
def obj_type(self): url = BrowserURL('housing', _id=Env('_id'))(self) if 'colocation' in url: return POSTS_TYPES.SHARING elif 'location' in url: isFurnished = False for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./span[has-class("criteria-label")]')(li) if label.lower() == "meublé": isFurnished = ( CleanText('./span[has-class("criteria-value")]')(li).lower() == 'oui' ) if isFurnished: return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif 'vente' in url: return POSTS_TYPES.SALE return NotAvailable
class get_housing(ItemElement): klass = Housing obj_id = Env('_id') def obj_type(self): url = BrowserURL('housing', _id=Env('_id'))(self) if 'colocation' in url: return POSTS_TYPES.SHARING elif 'location' in url: isFurnished = False for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./span[has-class("criteria-label")]')(li) if label.lower() == "meublé": isFurnished = ( CleanText('./span[has-class("criteria-value")]')(li).lower() == 'oui' ) if isFurnished: return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif 'vente' in url: return POSTS_TYPES.SALE return NotAvailable obj_advert_type = ADVERT_TYPES.PROFESSIONAL def obj_house_type(self): house_type = CleanText('.//h2[@class="offerMainFeatures"]/div')(self).lower() if house_type == "appartement": return HOUSE_TYPES.APART elif house_type == "maison": return HOUSE_TYPES.HOUSE elif house_type == "terrain": return HOUSE_TYPES.LAND elif house_type == "parking": return HOUSE_TYPES.PARKING else: return HOUSE_TYPES.OTHER obj_title = Attr('//meta[@property="og:title"]', 'content') obj_area = CleanDecimal( CleanText( '//p[@class="offerArea"]/span', ), default=NotAvailable ) obj_rooms = CleanDecimal( Regexp( CleanText('//p[@class="offerRooms"]/span'), '(\d) p.', default=NotAvailable ), default=NotAvailable ) obj_bedrooms = CleanDecimal( Regexp( CleanText('//p[@class="offerRooms"]/span'), '(\d) ch.', default=NotAvailable ), default=NotAvailable ) obj_cost = CleanDecimal('//*[@itemprop="price"]', default=0) obj_currency = Currency( '//*[@itemprop="price"]' ) def obj_utilities(self): notes = CleanText('//p[@class="offer-description-notes"]')(self) if "Loyer mensuel charges comprises" in notes: return UTILITIES.INCLUDED else: return UTILITIES.UNKNOWN obj_price_per_meter = PricePerMeterFilter() obj_date = Date(Regexp(CleanText('//div[@class="offer-description-notes"]'), u'.* Mis à jour: (\d{2}/\d{2}/\d{4}).*'), dayfirst=True) obj_text = CleanHTML('//p[@class="descrProperty"]') obj_location = CleanText('//em[@class="infoAdresse"]') obj_station = CleanText( '//div[has-class("offer-description-metro")]', default=NotAvailable ) obj_url = BrowserURL('housing', _id=Env('_id')) def obj_photos(self): photos = [] for img in XPath('//ul[@class="thumbsContainer"]//img/@src')(self): if img.endswith('.svg'): continue url = u'%s' % img.replace('182x136', '800x600') url = urljoin(self.page.url, url) # Ensure URL is absolute photos.append(HousingPhoto(url)) return photos def obj_DPE(self): energy_value = CleanText( '//ul[@class="energyInfosDPE"]//li[@class="energyInfos"]/span/@data-class', default="" )(self) if len(energy_value): energy_value = energy_value.replace("DPE", "").strip()[0] return getattr(ENERGY_CLASS, energy_value, NotAvailable) def obj_GES(self): greenhouse_value = CleanText( '//ul[@class="energyInfosGES"]//li[@class="energyInfos"]/span/@data-class', default="" )(self) if len(greenhouse_value): greenhouse_value = greenhouse_value.replace("GES", "").strip()[0] return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable) def obj_details(self): details = {} details["creationDate"] = Date( Regexp( CleanText( '//div[@class="offer-description-notes"]' ), u'.*Mis en ligne: (\d{2}/\d{2}/\d{4}).*' ), dayfirst=True )(self) honoraires = CleanText( ( '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]' ), default=None )(self) if honoraires: details["Honoraires"] = ( "{} (TTC, en sus)".format( honoraires.split(":")[1].strip() ) ) for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./span[has-class("criteria-label")]')(li) value = CleanText('./span[has-class("criteria-value")]')(li) details[label] = value return details
class item(ItemElement): offer_details_wrapper = ( './/div[has-class("offer-details-wrapper")]' ) klass = Housing obj_id = Format( '%s-%s', Regexp(Env('type'), '(.*)-.*'), CleanText('./@id', replace=[('header-offer-', '')]) ) obj_type = Env('query_type') obj_advert_type = ADVERT_TYPES.PROFESSIONAL def obj_house_type(self): house_type = CleanText('.//div[has-class("offer-details-caracteristik")]/meta[@itemprop="name"]/@content')(self).lower() if house_type == "appartement": return HOUSE_TYPES.APART elif house_type == "maison": return HOUSE_TYPES.HOUSE elif house_type == "terrain": return HOUSE_TYPES.LAND elif house_type == "parking": return HOUSE_TYPES.PARKING else: return HOUSE_TYPES.OTHER obj_title = CleanText('.//div[has-class("offer-details-type")]/a/@title') obj_url = Format(u'%s%s', CleanText('.//div/a[@class="offer-link"]/@href'), CleanText('.//div/a[@class="offer-link"]/\ @data-orpi', default="")) obj_area = CleanDecimal( ( offer_details_wrapper + '/div/div/div[has-class("offer-details-second")]' + '/div/h3[has-class("offer-attributes")]/span' + '/span[has-class("offer-area-number")]' ), default=NotLoaded ) obj_rooms = CleanDecimal( ( offer_details_wrapper + '/div/div/div[has-class("offer-details-second")]' + '/div/h3[has-class("offer-attributes")]' + '/span[has-class("offer-rooms")]' + '/span[has-class("offer-rooms-number")]' ), default=NotAvailable ) obj_cost = CleanDecimal( Regexp( CleanText( ( offer_details_wrapper + '/div/p[@class="offer-price"]/span' ), default=NotLoaded ), '(.*) [%s%s%s]' % (u'€', u'$', u'£'), default=NotLoaded ), default=NotLoaded ) obj_currency = Currency( offer_details_wrapper + '/div/p[has-class("offer-price")]/span' ) obj_price_per_meter = PricePerMeterFilter() obj_utilities = UTILITIES.UNKNOWN obj_text = CleanText( offer_details_wrapper + '/div/div/div/p[has-class("offer-description")]/span' ) obj_location = CleanText( offer_details_wrapper + '/div[@class="offer-details-location"]', replace=[('Voir sur la carte','')] ) def obj_photos(self): photos = [] url = None try: url = Attr( './/div[has-class("offer-picture")]//img', 'src' )(self) except: pass if url: url = url.replace('335x253', '800x600') url = urljoin(self.page.url, url) # Ensure URL is absolute photos.append(HousingPhoto(url)) return photos def obj_details(self): details = {} honoraires = CleanText( ( self.offer_details_wrapper + '/div/div/p[@class="offer-agency-fees"]' ), default=None )(self) if honoraires: details["Honoraires"] = ( "{} (TTC, en sus)".format( honoraires.split(":")[1].strip() ) ) return details
class item(ItemElement): klass = Housing def condition(self): title = self.obj_title(self) isNotFurnishedOk = True if self.env['query_type'] == POSTS_TYPES.RENT: isNotFurnishedOk = 'meublé' not in title.lower() id = self.obj_id(self) if id is None: return False return (Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)', default=None)(self) and isNotFurnishedOk) def parse(self, el): rooms_bedrooms_area = el.xpath( './div/a[has-class("item-title")]/ul[has-class("item-tags")]/li' ) self.env['rooms'] = NotLoaded self.env['bedrooms'] = NotLoaded self.env['area'] = NotLoaded for item in rooms_bedrooms_area: name = CleanText('.')(item) if 'chambre' in name.lower(): name = 'bedrooms' value = CleanDecimal('.')(item) elif 'pièce' in name.lower(): name = 'rooms' value = CleanDecimal('.')(item) else: name = 'area' value = CleanDecimal( Regexp(CleanText('.'), r'(\d*\.*\d*) .*'))(item) self.env[name] = value obj_id = Regexp(Link('./div/a[has-class("item-title")]'), '/annonces/(.*)', default=None) obj_type = Env('query_type') obj_advert_type = ADVERT_TYPES.PERSONAL def obj_house_type(self): item_link = Link('./div/a[@class="item-title"]')(self) house_type = item_link.split('/')[-1].split('-')[0] if 'parking' in house_type: return HOUSE_TYPES.PARKING elif 'appartement' in house_type: return HOUSE_TYPES.APART elif 'terrain' in house_type: return HOUSE_TYPES.LAND elif 'maison' in house_type: return HOUSE_TYPES.HOUSE else: return HOUSE_TYPES.OTHER obj_title = CleanText('./div/a[has-class("item-title")]') obj_area = Env('area') obj_cost = CleanDecimal(CleanText( './div/a[has-class("item-title")]/span[@class="item-price"]'), replace_dots=True, default=Decimal(0)) obj_currency = Currency( './div/a[@class="item-title"]/span[@class="item-price"]') obj_utilities = UTILITIES.UNKNOWN obj_station = CleanText('./div/p[@class="item-transports"]', default=NotLoaded) def obj_location(self): return CleanText('./div/p[@class="item-description"]')( self).split(".")[0] obj_text = CleanText('./div/p[@class="item-description"]', replace=[(' Lire la suite', '')]) obj_rooms = Env('rooms') obj_bedrooms = Env('bedrooms') obj_price_per_meter = PricePerMeterFilter() obj_url = Format(u'http://www.pap.fr%s', Link('./div/a[@class="item-title"]')) def obj_photos(self): photos = [] for img in XPath('./a/img/@src')(self): if (img.endswith("visuel-nophoto.png") or img.endswith('miniature-video.png')): continue photos.append(HousingPhoto(u'%s' % img)) return photos
class get_housing(ItemElement): klass = Housing def parse(self, el): rooms_bedrooms_area = el.xpath('.//ul[has-class("item-tags")]/li') self.env['rooms'] = NotAvailable self.env['bedrooms'] = NotAvailable self.env['area'] = NotAvailable for item in rooms_bedrooms_area: name = CleanText('.')(item) if 'chambre' in name.lower(): name = 'bedrooms' value = CleanDecimal('./strong')(item) elif 'pièce' in name.lower(): name = 'rooms' value = CleanDecimal('./strong')(item) elif ' m²' in name and 'le m²' not in name: name = 'area' value = CleanDecimal( Regexp(CleanText('.'), r'(\d*\.*\d*) .*'))(item) self.env[name] = value obj_id = Env('_id') def obj_type(self): prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self) if 'location' in prev_link: title = self.obj_title(self) if 'meublé' in title.lower(): return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif 'vente' in prev_link: return POSTS_TYPES.SALE elif 'viager' in prev_link: return POSTS_TYPES.VIAGER else: return NotAvailable obj_advert_type = ADVERT_TYPES.PERSONAL def obj_house_type(self): prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self) house_type = prev_link.split('-')[-1] if 'parking' in house_type: return HOUSE_TYPES.PARKING elif 'appartement' in house_type: return HOUSE_TYPES.APART elif 'terrain' in house_type: return HOUSE_TYPES.LAND elif 'maison' in house_type: return HOUSE_TYPES.HOUSE else: return HOUSE_TYPES.OTHER obj_title = CleanText('//h1[@class="item-title"]') obj_cost = CleanDecimal( '//h1[@class="item-title"]/span[@class="item-price"]', replace_dots=True) obj_currency = Currency( '//h1[@class="item-title"]/span[@class="item-price"]') obj_utilities = UTILITIES.UNKNOWN obj_area = Env('area') def obj_date(self): date = CleanText('//p[@class="item-date"]')(self).split( "/")[-1].strip() return parse_french_date(date) obj_rooms = Env('rooms') obj_bedrooms = Env('bedrooms') obj_price_per_meter = PricePerMeterFilter() obj_location = CleanText('//div[has-class("item-description")]/h2') obj_text = CleanText( CleanHTML('//div[has-class("item-description")]/div/p')) def obj_station(self): return ", ".join([ station.text for station in XPath( '//ul[has-class("item-transports")]//span[has-class("label")]' )(self) ]) def obj_phone(self): phone = CleanText( '(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]' )(self) phone = phone.replace(' ', ', ') return phone obj_url = BrowserURL('housing', _id=Env('_id')) def obj_DPE(self): DPE = Attr( '//div[has-class("energy-box")]//div[has-class("energy-rank")]', 'class', default="")(self) if DPE: DPE = [ x.replace("energy-rank-", "").upper() for x in DPE.split() if x.startswith("energy-rank-") ][0] return getattr(ENERGY_CLASS, DPE, NotAvailable) def obj_photos(self): photos = [] for img in XPath('//div[@class="owl-thumbs"]/a/img/@src')(self): if not img.endswith('miniature-video.png'): photos.append(HousingPhoto(u'%s' % img)) return photos
class get_housing(ItemElement): klass = Housing obj_id = Format( '%s:%s', Env('type'), Attr('//div[boolean(@data-property-reference)]', 'data-property-reference')) obj_advert_type = ADVERT_TYPES.PROFESSIONAL def obj_type(self): type = Env('type')(self) if type == 'location': if 'appartement-meuble' in self.page.url: return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif type == 'achat': return POSTS_TYPES.SALE else: return NotAvailable def obj_url(self): return self.page.url def obj_house_type(self): url = self.obj_url() for house_type, types in QUERY_HOUSE_TYPES.items(): for type in types: if ('/%s/' % type) in url: return house_type return NotAvailable obj_title = CleanText('//h1[has-class("OfferTop-title")]') obj_area = CleanDecimal(Regexp(CleanText( '//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'), r'(\d*\.*\d*) .*', default=NotAvailable), default=NotAvailable) obj_cost = CleanDecimal('//span[has-class("OfferTop-price")]', default=NotAvailable) obj_price_per_meter = PricePerMeterFilter() obj_currency = Currency('//span[has-class("OfferTop-price")]') obj_location = Format('%s - %s', CleanText('//p[@data-behat="adresseBien"]'), CleanText('//p[has-class("OfferTop-loc")]')) obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]') obj_phone = Regexp(Link('//a[has-class("OfferContact-btn--tel")]'), r'tel:(.*)') def obj_photos(self): photos = [] for photo in self.xpath('//div[has-class("OfferSlider")]//img'): photo_url = Attr('.', 'src')(photo) photo_url = photo_url.replace('640/480', '800/600') photos.append(HousingPhoto(photo_url)) return photos obj_date = datetime.date.today() def obj_utilities(self): price = CleanText('//p[has-class("OfferTop-price")]')(self) if "charges comprises" in price.lower(): return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED obj_rooms = CleanDecimal( '//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]', default=NotAvailable) obj_bedrooms = CleanDecimal( '//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]', default=NotAvailable) def obj_DPE(self): try: electric_consumption = CleanDecimal( Regexp( Attr('//div[has-class("OfferDetails-content")]//img', 'src'), r'https://dpe.foncia.net\/(\d+)\/.*'))(self) except (RegexpError, XPathNotFound): electric_consumption = None DPE = "" if electric_consumption is not None: if electric_consumption <= 50: DPE = "A" elif 50 < electric_consumption <= 90: DPE = "B" elif 90 < electric_consumption <= 150: DPE = "C" elif 150 < electric_consumption <= 230: DPE = "D" elif 230 < electric_consumption <= 330: DPE = "E" elif 330 < electric_consumption <= 450: DPE = "F" else: DPE = "G" return getattr(ENERGY_CLASS, DPE, NotAvailable) return NotAvailable def obj_details(self): details = {} dispo = Date( Regexp(CleanText('//p[has-class("OfferTop-dispo")]'), r'.* (\d\d\/\d\d\/\d\d\d\d)', default=datetime.date.today().isoformat()))(self) if dispo is not None: details["dispo"] = dispo priceMentions = CleanText('//p[has-class("OfferTop-mentions")]', default=None)(self) if priceMentions is not None: details["priceMentions"] = priceMentions agency = CleanText('//p[has-class("OfferContact-address")]', default=None)(self) if agency is not None: details["agency"] = agency for item in self.xpath( '//div[has-class("OfferDetails-columnize")]/div'): category = CleanText( './h3[has-class("OfferDetails-title--2")]', default=None)(item) if not category: continue details[category] = {} for detail_item in item.xpath( './/ul[has-class("List--data")]/li'): detail_title = CleanText( './/span[has-class("List-data")]')(detail_item) detail_value = CleanText('.//*[has-class("List-value")]')( detail_item) details[category][detail_title] = detail_value for detail_item in item.xpath( './/ul[has-class("List--bullet")]/li'): detail_title = CleanText('.')(detail_item) details[category][detail_title] = True try: electric_consumption = CleanDecimal( Regexp( Attr('//div[has-class("OfferDetails-content")]//img', 'src'), r'https://dpe.foncia.net\/(\d+)\/.*'))(self) details["electric_consumption"] = ( '{} kWhEP/m².an'.format(electric_consumption)) except (RegexpError, XPathNotFound): pass return details
class item(ItemElement): klass = Housing obj_id = Format( '%s:%s', Env('type'), Attr('.//span[boolean(@data-reference)]', 'data-reference')) obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a') obj_type = Env('query_type') obj_advert_type = ADVERT_TYPES.PROFESSIONAL def obj_house_type(self): url = self.obj_url(self) for house_type, types in QUERY_HOUSE_TYPES.items(): for type in types: if ('/%s/' % type) in url: return house_type return NotLoaded obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a') obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]') obj_area = CleanDecimal(Regexp(CleanText( './/div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]' ), r'(\d*\.*\d*) .*', default=NotAvailable), default=NotAvailable) obj_cost = CleanDecimal( './/strong[has-class("TeaserOffer-price-num")]', default=NotAvailable) obj_price_per_meter = PricePerMeterFilter() obj_currency = Currency( './/strong[has-class("TeaserOffer-price-num")]') obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]') obj_text = CleanText('.//p[has-class("TeaserOffer-description")]') def obj_photos(self): url = CleanText( Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src'))(self) # If the used photo is a default no photo, the src is on the same domain. if url[0] == '/': return [] else: return [HousingPhoto(url)] obj_date = datetime.date.today() def obj_utilities(self): price = CleanText( './/strong[has-class("TeaserOffer-price-num")]')(self) if "charges comprises" in price.lower(): return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED obj_rooms = CleanDecimal( './/div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]', default=NotLoaded) obj_bedrooms = CleanDecimal( './/div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]', default=NotLoaded) def obj_details(self): return { "dispo": Date( Attr('.//span[boolean(@data-dispo)]', 'data-dispo', default=datetime.date.today().isoformat()))(self), "priceMentions": CleanText( './/span[has-class("TeaserOffer-price-mentions")]')( self) }
class item(ItemElement): klass = Housing price_selector = './/span[@class="price-label"]|./div/div[@class="item-price-pdf"]' def is_agency(self): agency = CleanText('.//span[has-class("item-agency-name")]')(self.el) return 'annonce de particulier' not in agency.lower() def condition(self): if len(self.env['advert_types']) == 1: is_agency = self.is_agency() if self.env['advert_types'][0] == ADVERT_TYPES.PERSONAL: return not is_agency elif self.env['advert_types'][0] == ADVERT_TYPES.PROFESSIONAL: return is_agency return Attr('.', 'data-classified-id', default=False)(self) obj_id = Attr('.', 'data-classified-id') obj_type = Env('query_type') obj_title = CleanText('./div/h2[@class="item-type"]') def obj_advert_type(self): if self.is_agency(): return ADVERT_TYPES.PROFESSIONAL else: return ADVERT_TYPES.PERSONAL def obj_house_type(self): type = self.obj_title(self).split()[0].lower() if type == "appartement" or type == "studio" or type == "chambre": return HOUSE_TYPES.APART elif type == "maison" or type == "villa": return HOUSE_TYPES.HOUSE elif type == "parking": return HOUSE_TYPES.PARKING elif type == "terrain": return HOUSE_TYPES.LAND else: return HOUSE_TYPES.OTHER def obj_location(self): script = CleanText('./script')(self) try: # Should be standard JSON+LD data script = json.loads(script) except ValueError: try: # But explorimmo can't write JSON correctly and there # is a trailing "}" script = json.loads(script.strip().rstrip('}')) except ValueError: script = None if not script: return NotLoaded try: return '%s (%s)' % ( script['address']['addressLocality'], script['address']['postalCode'] ) except (KeyError): return NotLoaded def obj_cost(self): cost = CleanDecimal(Regexp(CleanText(self.price_selector, default=''), r'de (.*) à .*', default=0))(self) if cost == 0: return CleanDecimal(self.price_selector, default=NotAvailable)(self) else: return cost obj_currency = Currency(price_selector) def obj_utilities(self): utilities = CleanText( './div/div/span[@class="price-label"]|' './div/div[@class="item-price-pdf"]|' './div/div/span[@class="item-price"]' )(self) if "CC" in utilities: return UTILITIES.INCLUDED else: return UTILITIES.UNKNOWN obj_text = CleanText('./div/p[@itemprop="description"]') obj_area = CleanDecimal( Regexp( obj_title, r'(.*?)([\d,\.]*) m2(.*?)', '\\2', default=None ), replace_dots=True, default=NotLoaded ) obj_url = Format( "https://immobilier.lefigaro.fr/annonces/annonce-%s.html", CleanText('./@data-classified-id') ) obj_price_per_meter = PricePerMeterFilter() def obj_phone(self): phone = CleanText('./div/div/ul/li[has-class("js-clickphone")]', replace=[('Téléphoner : ', '')], default=NotLoaded)(self) if '...' in phone: return NotLoaded return phone def obj_details(self): charges = CleanText('.//span[@class="price-fees"]', default=None)(self) if charges: return { "fees": charges.split(":")[1].strip() } else: return NotLoaded def obj_photos(self): url = CleanText('./div[has-class("default-img")]/img/@data-src')(self) if url: url = unquote(url) if "http://" in url[3:]: rindex = url.rfind("?") if rindex == -1: rindex = None url = url[url.find("http://", 3):rindex] return [HousingPhoto(url)] else: return NotLoaded
class get_housing(ItemElement): klass = Housing def is_agency(self): return Dict('agency/isParticulier')(self) == 'false' obj_id = Env('_id') def obj_type(self): transaction = Dict('characteristics/transaction')(self) if transaction == 'location': if Dict('characteristics/isFurnished')(self): return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT elif transaction == 'vente': type = Dict('characteristics/estateType')(self).lower() if 'viager' in type: return POSTS_TYPES.VIAGER else: return POSTS_TYPES.SALE else: return NotAvailable def obj_advert_type(self): if self.is_agency: return ADVERT_TYPES.PROFESSIONAL else: return ADVERT_TYPES.PERSONAL def obj_house_type(self): type = Dict('characteristics/estateType')(self).lower() if 'appartement' in type: return HOUSE_TYPES.APART elif 'maison' in type: return HOUSE_TYPES.HOUSE elif 'parking' in type: return HOUSE_TYPES.PARKING elif 'terrain' in type: return HOUSE_TYPES.LAND else: return HOUSE_TYPES.OTHER obj_title = Dict('characteristics/titleWithTransaction') obj_location = Format('%s %s %s', Dict('location/address'), Dict('location/cityLabel'), Dict('location/postalCode')) def obj_cost(self): cost = TypeDecimal(Dict('characteristics/price'))(self) if cost == 0: cost = TypeDecimal(Dict('characteristics/priceMin'))(self) return cost obj_currency = BaseCurrency.get_currency('€') def obj_utilities(self): are_fees_included = Dict('characteristics/areFeesIncluded', default=None)(self) if are_fees_included: return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED obj_text = CleanHTML(Dict('characteristics/description')) obj_url = BrowserURL('housing_html', _id=Env('_id')) def obj_area(self): area = TypeDecimal(Dict('characteristics/area'))(self) if area == 0: area = TypeDecimal(Dict('characteristics/areaMin'))(self) return area obj_date = FromTimestamp(Dict('characteristics/date')) obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount')) def obj_rooms(self): # TODO: Why is roomCount a list? rooms = Dict('characteristics/roomCount', default=[])(self) if rooms: return TypeDecimal(rooms[0])(self) return NotAvailable obj_price_per_meter = PricePerMeterFilter() def obj_photos(self): photos = [] for img in Dict('characteristics/images')(self): m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl')) if m: photos.append(HousingPhoto(m.group(1))) else: photos.append(HousingPhoto(img.get('xl'))) return photos def obj_DPE(self): DPE = Dict( 'characteristics/energyConsumptionCategory', default="" )(self) return getattr(ENERGY_CLASS, DPE, NotAvailable) def obj_GES(self): GES = Dict( 'characteristics/greenhouseGasEmissionCategory', default="" )(self) return getattr(ENERGY_CLASS, GES, NotAvailable) def obj_details(self): details = {} details['fees'] = Dict( 'characteristics/fees', default=NotAvailable )(self) details['agencyFees'] = Dict( 'characteristics/agencyFees', default=NotAvailable )(self) details['guarantee'] = Dict( 'characteristics/guarantee', default=NotAvailable )(self) details['bathrooms'] = Dict( 'characteristics/bathroomCount', default=NotAvailable )(self) details['creationDate'] = FromTimestamp( Dict( 'characteristics/creationDate', default=NotAvailable ), default=NotAvailable )(self) details['availabilityDate'] = Dict( 'characteristics/estateAvailabilityDate', default=NotAvailable )(self) details['exposure'] = Dict( 'characteristics/exposure', default=NotAvailable )(self) details['heatingType'] = Dict( 'characteristics/heatingType', default=NotAvailable )(self) details['floor'] = Dict( 'characteristics/floor', default=NotAvailable )(self) details['bedrooms'] = Dict( 'characteristics/bedroomCount', default=NotAvailable )(self) details['isFurnished'] = Dict( 'characteristics/isFurnished', default=NotAvailable )(self) rooms = Dict('characteristics/roomCount', default=[])(self) if len(rooms): details['rooms'] = rooms[0] details['available'] = Dict( 'characteristics/isAvailable', default=NotAvailable )(self) agency = Dict('agency', default=NotAvailable)(self) details['agency'] = ', '.join([ x for x in [ agency.get('corporateName', ''), agency.get('corporateAddress', ''), agency.get('corporatePostalCode', ''), agency.get('corporateCity', '') ] if x ]) return details
class get_housing(ItemElement): klass = Housing def parse(self, el): self.env['details'] = { obj['key']: obj['value_label'] for obj in el['attributes'] } obj_id = Env('_id') obj_area = CleanDecimal(PopDetail('square', default=0), default=NotAvailable) obj_rooms = CleanDecimal(PopDetail('rooms', default=0), default=NotAvailable) def obj_GES(self): ges = CleanText(PopDetail('ges', default='|'))(self) return getattr(ENERGY_CLASS, ges[0], NotAvailable) def obj_DPE(self): dpe = CleanText(PopDetail('energy_rate', default='|'))(self) return getattr(ENERGY_CLASS, dpe[0], NotAvailable) def obj_house_type(self): value = CleanText(PopDetail('real_estate_type'), default=' ')(self).lower() if value == 'parking': return HOUSE_TYPES.PARKING elif value == 'appartement': return HOUSE_TYPES.APART elif value == 'maison': return HOUSE_TYPES.HOUSE elif value == 'terrain': return HOUSE_TYPES.LAND else: return HOUSE_TYPES.OTHER def obj_utilities(self): value = CleanText(PopDetail('charges_included', default='Non'), default=NotAvailable)(self) if value == "Oui": return UTILITIES.INCLUDED else: return UTILITIES.EXCLUDED obj_title = Dict('subject') obj_cost = CleanDecimal(Dict('price/0', default=NotAvailable), default=Decimal(0)) obj_currency = BaseCurrency.get_currency(u'€') obj_text = Dict('body') obj_location = Dict('location/city_label') def obj_advert_type(self): line_pro = Dict('owner/type')(self) if line_pro == u'pro': return ADVERT_TYPES.PROFESSIONAL else: return ADVERT_TYPES.PERSONAL obj_date = DateTime(Dict('first_publication_date')) def obj_photos(self): photos = [] for img in Dict('images/urls_large', default=[])(self): photos.append(HousingPhoto(img)) return photos def obj_type(self): try: breadcrumb = int(Dict('category_id')(self)) except ValueError: breadcrumb = None if breadcrumb == 11: return POSTS_TYPES.SHARING elif breadcrumb == 10: isFurnished = CleanText(PopDetail('furnished', default=' '))(self) if isFurnished.lower() == u'meublé': return POSTS_TYPES.FURNISHED_RENT else: return POSTS_TYPES.RENT else: return POSTS_TYPES.SALE obj_price_per_meter = PricePerMeterFilter() obj_url = Dict('url') obj_details = Env('details')