Пример #1
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_title = CleanText('//h1[@itemprop="name"]')
        obj_location = CleanText('//span[@class="informations-localisation"]')
        obj_cost = CleanDecimal('//span[@itemprop="price"]')
        obj_currency = Currency('//span[@itemprop="price"]')
        obj_text = CleanHTML('//div[@itemprop="description"]')
        obj_url = BrowserURL('housing', _id=Env('_id'))
        obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'),
                                       r'(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable)
        obj_price_per_meter = PricePerMeterFilter()

        def obj_photos(self):
            photos = []
            for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self):
                url = Regexp(CleanText('./@src'), r'http://thbr\.figarocms\.net.*(http://.*)')(img)
                photos.append(HousingPhoto(url))
            return photos

        def obj_details(self):
            details = dict()
            for item in XPath('//div[@class="features clearfix"]/ul/li')(self):
                key = CleanText('./span[@class="name"]')(item)
                value = CleanText('./span[@class="value"]')(item)
                if value and key:
                    details[key] = value

            key = CleanText('//div[@class="title-dpe clearfix"]')(self)
            value = CleanText('//div[@class="energy-consumption"]')(self)
            if value and key:
                details[key] = value
            return details
Пример #2
0
 def obj_type(self):
     idType = int(Env('query_type')(self))
     type = next(k for k, v in TYPES.items() if v == idType)
     if type == POSTS_TYPES.FURNISHED_RENT:
         # SeLoger does not let us discriminate between furnished and not furnished.
         return POSTS_TYPES.RENT
     return type
Пример #3
0
        def next_page(self):
            data = Env('data')(self)
            if data['offset'] > self.page.doc['total_all']:
                return

            data['offset'] = data['offset'] + data['limit']
            return requests.Request("POST",
                                    self.page.url,
                                    data=json.dumps(data))
Пример #4
0
        def next_page(self):
            page_nb = Dict('navigation/pagination/page')(self)
            max_results = Dict('navigation/counts/count')(self)
            results_per_page = Dict('navigation/pagination/resultsPerPage')(
                self)

            if int(max_results) / int(results_per_page) > int(page_nb):
                return BrowserURL('search',
                                  query=Env('query'),
                                  page_number=int(page_nb) + 1)(self)
Пример #5
0
 def obj_type(self):
     type = Env('type')(self)
     if type == 'location':
         if 'appartement-meuble' in self.page.url:
             return POSTS_TYPES.FURNISHED_RENT
         else:
             return POSTS_TYPES.RENT
     elif type == 'achat':
         return POSTS_TYPES.SALE
     else:
         return NotAvailable
Пример #6
0
 def obj_type(self):
     url = BrowserURL('housing', _id=Env('_id'))(self)
     if 'colocation' in url:
         return POSTS_TYPES.SHARING
     elif 'location' in url:
         isFurnished = False
         for li in XPath('//ul[@itemprop="description"]/li')(self):
             label = CleanText('./span[has-class("criteria-label")]')(li)
             if label.lower() == "meublé":
                 isFurnished = (
                     CleanText('./span[has-class("criteria-value")]')(li).lower() == 'oui'
                 )
         if isFurnished:
             return POSTS_TYPES.FURNISHED_RENT
         else:
             return POSTS_TYPES.RENT
     elif 'vente' in url:
         return POSTS_TYPES.SALE
     return NotAvailable
Пример #7
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')

        def obj_type(self):
            url = BrowserURL('housing', _id=Env('_id'))(self)
            if 'colocation' in url:
                return POSTS_TYPES.SHARING
            elif 'location' in url:
                isFurnished = False
                for li in XPath('//ul[@itemprop="description"]/li')(self):
                    label = CleanText('./span[has-class("criteria-label")]')(li)
                    if label.lower() == "meublé":
                        isFurnished = (
                            CleanText('./span[has-class("criteria-value")]')(li).lower() == 'oui'
                        )
                if isFurnished:
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            elif 'vente' in url:
                return POSTS_TYPES.SALE
            return NotAvailable
        obj_advert_type = ADVERT_TYPES.PROFESSIONAL

        def obj_house_type(self):
            house_type = CleanText('.//h2[@class="offerMainFeatures"]/div')(self).lower()
            if house_type == "appartement":
                return HOUSE_TYPES.APART
            elif house_type == "maison":
                return HOUSE_TYPES.HOUSE
            elif house_type == "terrain":
                return HOUSE_TYPES.LAND
            elif house_type == "parking":
                return HOUSE_TYPES.PARKING
            else:
                return HOUSE_TYPES.OTHER

        obj_title = Attr('//meta[@property="og:title"]', 'content')
        obj_area = CleanDecimal(
            CleanText(
                '//p[@class="offerArea"]/span',
            ),
            default=NotAvailable
        )
        obj_rooms = CleanDecimal(
                        Regexp(
                            CleanText('//p[@class="offerRooms"]/span'),
                            '(\d) p.',
                            default=NotAvailable
                        ),
                        default=NotAvailable
                    )
        obj_bedrooms = CleanDecimal(
                        Regexp(
                            CleanText('//p[@class="offerRooms"]/span'),
                            '(\d) ch.',
                            default=NotAvailable
                        ),
                        default=NotAvailable
                    )
        obj_cost = CleanDecimal('//*[@itemprop="price"]', default=0)
        obj_currency = Currency(
            '//*[@itemprop="price"]'
        )

        def obj_utilities(self):
            notes = CleanText('//p[@class="offer-description-notes"]')(self)
            if "Loyer mensuel charges comprises" in notes:
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.UNKNOWN

        obj_price_per_meter = PricePerMeterFilter()
        obj_date = Date(Regexp(CleanText('//div[@class="offer-description-notes"]'),
                               u'.* Mis à jour: (\d{2}/\d{2}/\d{4}).*'),
                        dayfirst=True)
        obj_text = CleanHTML('//p[@class="descrProperty"]')
        obj_location = CleanText('//em[@class="infoAdresse"]')
        obj_station = CleanText(
            '//div[has-class("offer-description-metro")]',
            default=NotAvailable
        )

        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_photos(self):
            photos = []
            for img in XPath('//ul[@class="thumbsContainer"]//img/@src')(self):
                if img.endswith('.svg'):
                    continue
                url = u'%s' % img.replace('182x136', '800x600')
                url = urljoin(self.page.url, url)  # Ensure URL is absolute
                photos.append(HousingPhoto(url))
            return photos

        def obj_DPE(self):
            energy_value = CleanText(
                '//ul[@class="energyInfosDPE"]//li[@class="energyInfos"]/span/@data-class',
                default=""
            )(self)
            if len(energy_value):
                energy_value = energy_value.replace("DPE", "").strip()[0]
            return getattr(ENERGY_CLASS, energy_value, NotAvailable)

        def obj_GES(self):
            greenhouse_value = CleanText(
                '//ul[@class="energyInfosGES"]//li[@class="energyInfos"]/span/@data-class',
                default=""
            )(self)
            if len(greenhouse_value):
                greenhouse_value = greenhouse_value.replace("GES", "").strip()[0]
            return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable)

        def obj_details(self):
            details = {}

            details["creationDate"] = Date(
                Regexp(
                    CleanText(
                        '//div[@class="offer-description-notes"]'
                    ),
                    u'.*Mis en ligne: (\d{2}/\d{2}/\d{4}).*'
                ),
                dayfirst=True
            )(self)

            honoraires = CleanText(
                (
                    '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]'
                ),
                default=None
            )(self)
            if honoraires:
                details["Honoraires"] = (
                    "{} (TTC, en sus)".format(
                        honoraires.split(":")[1].strip()
                    )
                )

            for li in XPath('//ul[@itemprop="description"]/li')(self):
                label = CleanText('./span[has-class("criteria-label")]')(li)
                value = CleanText('./span[has-class("criteria-value")]')(li)
                details[label] = value

            return details
Пример #8
0
        class item(ItemElement):
            offer_details_wrapper = (
                './/div[has-class("offer-details-wrapper")]'
            )
            klass = Housing

            obj_id = Format(
                '%s-%s',
                Regexp(Env('type'), '(.*)-.*'),
                CleanText('./@id', replace=[('header-offer-', '')])
            )
            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PROFESSIONAL

            def obj_house_type(self):
                house_type = CleanText('.//div[has-class("offer-details-caracteristik")]/meta[@itemprop="name"]/@content')(self).lower()
                if house_type == "appartement":
                    return HOUSE_TYPES.APART
                elif house_type == "maison":
                    return HOUSE_TYPES.HOUSE
                elif house_type == "terrain":
                    return HOUSE_TYPES.LAND
                elif house_type == "parking":
                    return HOUSE_TYPES.PARKING
                else:
                    return HOUSE_TYPES.OTHER

            obj_title = CleanText('.//div[has-class("offer-details-type")]/a/@title')

            obj_url = Format(u'%s%s',
                             CleanText('.//div/a[@class="offer-link"]/@href'),
                             CleanText('.//div/a[@class="offer-link"]/\
@data-orpi', default=""))

            obj_area = CleanDecimal(
                (
                    offer_details_wrapper +
                    '/div/div/div[has-class("offer-details-second")]' +
                    '/div/h3[has-class("offer-attributes")]/span' +
                    '/span[has-class("offer-area-number")]'
                ),
                default=NotLoaded
            )
            obj_rooms = CleanDecimal(
                (
                    offer_details_wrapper +
                    '/div/div/div[has-class("offer-details-second")]' +
                    '/div/h3[has-class("offer-attributes")]' +
                    '/span[has-class("offer-rooms")]' +
                    '/span[has-class("offer-rooms-number")]'
                ),
                default=NotAvailable
            )
            obj_cost = CleanDecimal(
                Regexp(
                    CleanText(
                        (
                            offer_details_wrapper +
                            '/div/p[@class="offer-price"]/span'
                        ),
                        default=NotLoaded
                    ),
                    '(.*) [%s%s%s]' % (u'€', u'$', u'£'),
                    default=NotLoaded
                ),
                default=NotLoaded
            )
            obj_currency = Currency(
                offer_details_wrapper + '/div/p[has-class("offer-price")]/span'
            )
            obj_price_per_meter = PricePerMeterFilter()
            obj_utilities = UTILITIES.UNKNOWN
            obj_text = CleanText(
                offer_details_wrapper + '/div/div/div/p[has-class("offer-description")]/span'
            )
            obj_location = CleanText(
                offer_details_wrapper + '/div[@class="offer-details-location"]',
                replace=[('Voir sur la carte','')]
            )

            def obj_photos(self):
                photos = []
                url = None
                try:
                    url = Attr(
                        './/div[has-class("offer-picture")]//img',
                        'src'
                    )(self)
                except:
                    pass

                if url:
                    url = url.replace('335x253', '800x600')
                    url = urljoin(self.page.url, url)  # Ensure URL is absolute
                    photos.append(HousingPhoto(url))
                return photos

            def obj_details(self):
                details = {}
                honoraires = CleanText(
                    (
                        self.offer_details_wrapper +
                        '/div/div/p[@class="offer-agency-fees"]'
                    ),
                    default=None
                )(self)
                if honoraires:
                    details["Honoraires"] = (
                        "{} (TTC, en sus)".format(
                            honoraires.split(":")[1].strip()
                        )
                    )
                return details
Пример #9
0
        class item(ItemElement):
            klass = Housing

            def condition(self):
                title = self.obj_title(self)
                isNotFurnishedOk = True
                if self.env['query_type'] == POSTS_TYPES.RENT:
                    isNotFurnishedOk = 'meublé' not in title.lower()
                id = self.obj_id(self)
                if id is None:
                    return False
                return (Regexp(Link('./div/a[has-class("item-title")]'),
                               '/annonces/(.*)',
                               default=None)(self) and isNotFurnishedOk)

            def parse(self, el):
                rooms_bedrooms_area = el.xpath(
                    './div/a[has-class("item-title")]/ul[has-class("item-tags")]/li'
                )
                self.env['rooms'] = NotLoaded
                self.env['bedrooms'] = NotLoaded
                self.env['area'] = NotLoaded

                for item in rooms_bedrooms_area:
                    name = CleanText('.')(item)
                    if 'chambre' in name.lower():
                        name = 'bedrooms'
                        value = CleanDecimal('.')(item)
                    elif 'pièce' in name.lower():
                        name = 'rooms'
                        value = CleanDecimal('.')(item)
                    else:
                        name = 'area'
                        value = CleanDecimal(
                            Regexp(CleanText('.'), r'(\d*\.*\d*) .*'))(item)
                    self.env[name] = value

            obj_id = Regexp(Link('./div/a[has-class("item-title")]'),
                            '/annonces/(.*)',
                            default=None)

            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PERSONAL

            def obj_house_type(self):
                item_link = Link('./div/a[@class="item-title"]')(self)
                house_type = item_link.split('/')[-1].split('-')[0]
                if 'parking' in house_type:
                    return HOUSE_TYPES.PARKING
                elif 'appartement' in house_type:
                    return HOUSE_TYPES.APART
                elif 'terrain' in house_type:
                    return HOUSE_TYPES.LAND
                elif 'maison' in house_type:
                    return HOUSE_TYPES.HOUSE
                else:
                    return HOUSE_TYPES.OTHER

            obj_title = CleanText('./div/a[has-class("item-title")]')
            obj_area = Env('area')
            obj_cost = CleanDecimal(CleanText(
                './div/a[has-class("item-title")]/span[@class="item-price"]'),
                                    replace_dots=True,
                                    default=Decimal(0))
            obj_currency = Currency(
                './div/a[@class="item-title"]/span[@class="item-price"]')
            obj_utilities = UTILITIES.UNKNOWN

            obj_station = CleanText('./div/p[@class="item-transports"]',
                                    default=NotLoaded)

            def obj_location(self):
                return CleanText('./div/p[@class="item-description"]')(
                    self).split(".")[0]

            obj_text = CleanText('./div/p[@class="item-description"]',
                                 replace=[(' Lire la suite', '')])
            obj_rooms = Env('rooms')
            obj_bedrooms = Env('bedrooms')
            obj_price_per_meter = PricePerMeterFilter()

            obj_url = Format(u'http://www.pap.fr%s',
                             Link('./div/a[@class="item-title"]'))

            def obj_photos(self):
                photos = []
                for img in XPath('./a/img/@src')(self):
                    if (img.endswith("visuel-nophoto.png")
                            or img.endswith('miniature-video.png')):
                        continue
                    photos.append(HousingPhoto(u'%s' % img))
                return photos
Пример #10
0
    class get_housing(ItemElement):
        klass = Housing

        def parse(self, el):
            rooms_bedrooms_area = el.xpath('.//ul[has-class("item-tags")]/li')
            self.env['rooms'] = NotAvailable
            self.env['bedrooms'] = NotAvailable
            self.env['area'] = NotAvailable

            for item in rooms_bedrooms_area:
                name = CleanText('.')(item)
                if 'chambre' in name.lower():
                    name = 'bedrooms'
                    value = CleanDecimal('./strong')(item)
                elif 'pièce' in name.lower():
                    name = 'rooms'
                    value = CleanDecimal('./strong')(item)
                elif ' m²' in name and 'le m²' not in name:
                    name = 'area'
                    value = CleanDecimal(
                        Regexp(CleanText('.'), r'(\d*\.*\d*) .*'))(item)
                self.env[name] = value

        obj_id = Env('_id')

        def obj_type(self):
            prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
            if 'location' in prev_link:
                title = self.obj_title(self)
                if 'meublé' in title.lower():
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            elif 'vente' in prev_link:
                return POSTS_TYPES.SALE
            elif 'viager' in prev_link:
                return POSTS_TYPES.VIAGER
            else:
                return NotAvailable

        obj_advert_type = ADVERT_TYPES.PERSONAL

        def obj_house_type(self):
            prev_link = Link('//ol[has-class("breadcrumb")]/li[1]/a')(self)
            house_type = prev_link.split('-')[-1]
            if 'parking' in house_type:
                return HOUSE_TYPES.PARKING
            elif 'appartement' in house_type:
                return HOUSE_TYPES.APART
            elif 'terrain' in house_type:
                return HOUSE_TYPES.LAND
            elif 'maison' in house_type:
                return HOUSE_TYPES.HOUSE
            else:
                return HOUSE_TYPES.OTHER

        obj_title = CleanText('//h1[@class="item-title"]')
        obj_cost = CleanDecimal(
            '//h1[@class="item-title"]/span[@class="item-price"]',
            replace_dots=True)
        obj_currency = Currency(
            '//h1[@class="item-title"]/span[@class="item-price"]')
        obj_utilities = UTILITIES.UNKNOWN
        obj_area = Env('area')

        def obj_date(self):
            date = CleanText('//p[@class="item-date"]')(self).split(
                "/")[-1].strip()
            return parse_french_date(date)

        obj_rooms = Env('rooms')
        obj_bedrooms = Env('bedrooms')
        obj_price_per_meter = PricePerMeterFilter()
        obj_location = CleanText('//div[has-class("item-description")]/h2')
        obj_text = CleanText(
            CleanHTML('//div[has-class("item-description")]/div/p'))

        def obj_station(self):
            return ", ".join([
                station.text for station in XPath(
                    '//ul[has-class("item-transports")]//span[has-class("label")]'
                )(self)
            ])

        def obj_phone(self):
            phone = CleanText(
                '(//div[has-class("contact-proprietaire-box")]//strong[@class="tel-wrapper"])[1]'
            )(self)
            phone = phone.replace(' ', ', ')
            return phone

        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_DPE(self):
            DPE = Attr(
                '//div[has-class("energy-box")]//div[has-class("energy-rank")]',
                'class',
                default="")(self)
            if DPE:
                DPE = [
                    x.replace("energy-rank-", "").upper() for x in DPE.split()
                    if x.startswith("energy-rank-")
                ][0]
            return getattr(ENERGY_CLASS, DPE, NotAvailable)

        def obj_photos(self):
            photos = []
            for img in XPath('//div[@class="owl-thumbs"]/a/img/@src')(self):
                if not img.endswith('miniature-video.png'):
                    photos.append(HousingPhoto(u'%s' % img))
            return photos
Пример #11
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Format(
            '%s:%s', Env('type'),
            Attr('//div[boolean(@data-property-reference)]',
                 'data-property-reference'))
        obj_advert_type = ADVERT_TYPES.PROFESSIONAL

        def obj_type(self):
            type = Env('type')(self)
            if type == 'location':
                if 'appartement-meuble' in self.page.url:
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            elif type == 'achat':
                return POSTS_TYPES.SALE
            else:
                return NotAvailable

        def obj_url(self):
            return self.page.url

        def obj_house_type(self):
            url = self.obj_url()
            for house_type, types in QUERY_HOUSE_TYPES.items():
                for type in types:
                    if ('/%s/' % type) in url:
                        return house_type
            return NotAvailable

        obj_title = CleanText('//h1[has-class("OfferTop-title")]')
        obj_area = CleanDecimal(Regexp(CleanText(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'),
                                       r'(\d*\.*\d*) .*',
                                       default=NotAvailable),
                                default=NotAvailable)
        obj_cost = CleanDecimal('//span[has-class("OfferTop-price")]',
                                default=NotAvailable)
        obj_price_per_meter = PricePerMeterFilter()
        obj_currency = Currency('//span[has-class("OfferTop-price")]')
        obj_location = Format('%s - %s',
                              CleanText('//p[@data-behat="adresseBien"]'),
                              CleanText('//p[has-class("OfferTop-loc")]'))
        obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
        obj_phone = Regexp(Link('//a[has-class("OfferContact-btn--tel")]'),
                           r'tel:(.*)')

        def obj_photos(self):
            photos = []
            for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
                photo_url = Attr('.', 'src')(photo)
                photo_url = photo_url.replace('640/480', '800/600')
                photos.append(HousingPhoto(photo_url))
            return photos

        obj_date = datetime.date.today()

        def obj_utilities(self):
            price = CleanText('//p[has-class("OfferTop-price")]')(self)
            if "charges comprises" in price.lower():
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.EXCLUDED

        obj_rooms = CleanDecimal(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]',
            default=NotAvailable)
        obj_bedrooms = CleanDecimal(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
            default=NotAvailable)

        def obj_DPE(self):
            try:
                electric_consumption = CleanDecimal(
                    Regexp(
                        Attr('//div[has-class("OfferDetails-content")]//img',
                             'src'),
                        r'https://dpe.foncia.net\/(\d+)\/.*'))(self)
            except (RegexpError, XPathNotFound):
                electric_consumption = None

            DPE = ""
            if electric_consumption is not None:
                if electric_consumption <= 50:
                    DPE = "A"
                elif 50 < electric_consumption <= 90:
                    DPE = "B"
                elif 90 < electric_consumption <= 150:
                    DPE = "C"
                elif 150 < electric_consumption <= 230:
                    DPE = "D"
                elif 230 < electric_consumption <= 330:
                    DPE = "E"
                elif 330 < electric_consumption <= 450:
                    DPE = "F"
                else:
                    DPE = "G"
                return getattr(ENERGY_CLASS, DPE, NotAvailable)
            return NotAvailable

        def obj_details(self):
            details = {}

            dispo = Date(
                Regexp(CleanText('//p[has-class("OfferTop-dispo")]'),
                       r'.* (\d\d\/\d\d\/\d\d\d\d)',
                       default=datetime.date.today().isoformat()))(self)
            if dispo is not None:
                details["dispo"] = dispo

            priceMentions = CleanText('//p[has-class("OfferTop-mentions")]',
                                      default=None)(self)
            if priceMentions is not None:
                details["priceMentions"] = priceMentions

            agency = CleanText('//p[has-class("OfferContact-address")]',
                               default=None)(self)
            if agency is not None:
                details["agency"] = agency

            for item in self.xpath(
                    '//div[has-class("OfferDetails-columnize")]/div'):
                category = CleanText(
                    './h3[has-class("OfferDetails-title--2")]',
                    default=None)(item)
                if not category:
                    continue

                details[category] = {}

                for detail_item in item.xpath(
                        './/ul[has-class("List--data")]/li'):
                    detail_title = CleanText(
                        './/span[has-class("List-data")]')(detail_item)
                    detail_value = CleanText('.//*[has-class("List-value")]')(
                        detail_item)
                    details[category][detail_title] = detail_value

                for detail_item in item.xpath(
                        './/ul[has-class("List--bullet")]/li'):
                    detail_title = CleanText('.')(detail_item)
                    details[category][detail_title] = True

            try:
                electric_consumption = CleanDecimal(
                    Regexp(
                        Attr('//div[has-class("OfferDetails-content")]//img',
                             'src'),
                        r'https://dpe.foncia.net\/(\d+)\/.*'))(self)
                details["electric_consumption"] = (
                    '{} kWhEP/m².an'.format(electric_consumption))
            except (RegexpError, XPathNotFound):
                pass

            return details
Пример #12
0
        class item(ItemElement):
            klass = Housing

            obj_id = Format(
                '%s:%s', Env('type'),
                Attr('.//span[boolean(@data-reference)]', 'data-reference'))
            obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PROFESSIONAL

            def obj_house_type(self):
                url = self.obj_url(self)
                for house_type, types in QUERY_HOUSE_TYPES.items():
                    for type in types:
                        if ('/%s/' % type) in url:
                            return house_type
                return NotLoaded

            obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
            obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
            obj_area = CleanDecimal(Regexp(CleanText(
                './/div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
            ),
                                           r'(\d*\.*\d*) .*',
                                           default=NotAvailable),
                                    default=NotAvailable)
            obj_cost = CleanDecimal(
                './/strong[has-class("TeaserOffer-price-num")]',
                default=NotAvailable)
            obj_price_per_meter = PricePerMeterFilter()
            obj_currency = Currency(
                './/strong[has-class("TeaserOffer-price-num")]')
            obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
            obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')

            def obj_photos(self):
                url = CleanText(
                    Attr('.//a[has-class("TeaserOffer-ill")]/img',
                         'src'))(self)
                # If the used photo is a default no photo, the src is on the same domain.
                if url[0] == '/':
                    return []
                else:
                    return [HousingPhoto(url)]

            obj_date = datetime.date.today()

            def obj_utilities(self):
                price = CleanText(
                    './/strong[has-class("TeaserOffer-price-num")]')(self)
                if "charges comprises" in price.lower():
                    return UTILITIES.INCLUDED
                else:
                    return UTILITIES.EXCLUDED

            obj_rooms = CleanDecimal(
                './/div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]',
                default=NotLoaded)
            obj_bedrooms = CleanDecimal(
                './/div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]',
                default=NotLoaded)

            def obj_details(self):
                return {
                    "dispo":
                    Date(
                        Attr('.//span[boolean(@data-dispo)]',
                             'data-dispo',
                             default=datetime.date.today().isoformat()))(self),
                    "priceMentions":
                    CleanText(
                        './/span[has-class("TeaserOffer-price-mentions")]')(
                            self)
                }
Пример #13
0
        class item(ItemElement):
            klass = Housing
            price_selector = './/span[@class="price-label"]|./div/div[@class="item-price-pdf"]'

            def is_agency(self):
                agency = CleanText('.//span[has-class("item-agency-name")]')(self.el)
                return 'annonce de particulier' not in agency.lower()

            def condition(self):
                if len(self.env['advert_types']) == 1:
                    is_agency = self.is_agency()
                    if self.env['advert_types'][0] == ADVERT_TYPES.PERSONAL:
                        return not is_agency
                    elif self.env['advert_types'][0] == ADVERT_TYPES.PROFESSIONAL:
                        return is_agency
                return Attr('.', 'data-classified-id', default=False)(self)

            obj_id = Attr('.', 'data-classified-id')
            obj_type = Env('query_type')
            obj_title = CleanText('./div/h2[@class="item-type"]')

            def obj_advert_type(self):
                if self.is_agency():
                    return ADVERT_TYPES.PROFESSIONAL
                else:
                    return ADVERT_TYPES.PERSONAL

            def obj_house_type(self):
                type = self.obj_title(self).split()[0].lower()
                if type == "appartement" or type == "studio" or type == "chambre":
                    return HOUSE_TYPES.APART
                elif type == "maison" or type == "villa":
                    return HOUSE_TYPES.HOUSE
                elif type == "parking":
                    return HOUSE_TYPES.PARKING
                elif type == "terrain":
                    return HOUSE_TYPES.LAND
                else:
                    return HOUSE_TYPES.OTHER

            def obj_location(self):
                script = CleanText('./script')(self)
                try:
                    # Should be standard JSON+LD data
                    script = json.loads(script)
                except ValueError:
                    try:
                        # But explorimmo can't write JSON correctly and there
                        # is a trailing "}"
                        script = json.loads(script.strip().rstrip('}'))
                    except ValueError:
                        script = None
                if not script:
                    return NotLoaded

                try:
                    return '%s (%s)' % (
                        script['address']['addressLocality'],
                        script['address']['postalCode']
                    )
                except (KeyError):
                    return NotLoaded

            def obj_cost(self):
                cost = CleanDecimal(Regexp(CleanText(self.price_selector, default=''),
                                           r'de (.*) à .*',
                                           default=0))(self)
                if cost == 0:
                    return CleanDecimal(self.price_selector, default=NotAvailable)(self)
                else:
                    return cost

            obj_currency = Currency(price_selector)

            def obj_utilities(self):
                utilities = CleanText(
                    './div/div/span[@class="price-label"]|'
                    './div/div[@class="item-price-pdf"]|'
                    './div/div/span[@class="item-price"]'
                )(self)
                if "CC" in utilities:
                    return UTILITIES.INCLUDED
                else:
                    return UTILITIES.UNKNOWN

            obj_text = CleanText('./div/p[@itemprop="description"]')
            obj_area = CleanDecimal(
                Regexp(
                    obj_title,
                    r'(.*?)([\d,\.]*) m2(.*?)',
                    '\\2',
                    default=None
                ),
                replace_dots=True,
                default=NotLoaded
            )

            obj_url = Format(
                "https://immobilier.lefigaro.fr/annonces/annonce-%s.html",
                CleanText('./@data-classified-id')
            )

            obj_price_per_meter = PricePerMeterFilter()

            def obj_phone(self):
                phone = CleanText('./div/div/ul/li[has-class("js-clickphone")]',
                                  replace=[('Téléphoner : ', '')],
                                  default=NotLoaded)(self)

                if '...' in phone:
                    return NotLoaded

                return phone

            def obj_details(self):
                charges = CleanText('.//span[@class="price-fees"]',
                                    default=None)(self)
                if charges:
                    return {
                        "fees": charges.split(":")[1].strip()
                    }
                else:
                    return NotLoaded

            def obj_photos(self):
                url = CleanText('./div[has-class("default-img")]/img/@data-src')(self)
                if url:
                    url = unquote(url)
                    if "http://" in url[3:]:
                        rindex = url.rfind("?")
                        if rindex == -1:
                            rindex = None
                        url = url[url.find("http://", 3):rindex]
                    return [HousingPhoto(url)]
                else:
                    return NotLoaded
Пример #14
0
    class get_housing(ItemElement):
        klass = Housing

        def is_agency(self):
            return Dict('agency/isParticulier')(self) == 'false'

        obj_id = Env('_id')

        def obj_type(self):
            transaction = Dict('characteristics/transaction')(self)
            if transaction == 'location':
                if Dict('characteristics/isFurnished')(self):
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            elif transaction == 'vente':
                type = Dict('characteristics/estateType')(self).lower()
                if 'viager' in type:
                    return POSTS_TYPES.VIAGER
                else:
                    return POSTS_TYPES.SALE
            else:
                return NotAvailable

        def obj_advert_type(self):
            if self.is_agency:
                return ADVERT_TYPES.PROFESSIONAL
            else:
                return ADVERT_TYPES.PERSONAL

        def obj_house_type(self):
            type = Dict('characteristics/estateType')(self).lower()
            if 'appartement' in type:
                return HOUSE_TYPES.APART
            elif 'maison' in type:
                return HOUSE_TYPES.HOUSE
            elif 'parking' in type:
                return HOUSE_TYPES.PARKING
            elif 'terrain' in type:
                return HOUSE_TYPES.LAND
            else:
                return HOUSE_TYPES.OTHER

        obj_title = Dict('characteristics/titleWithTransaction')
        obj_location = Format('%s %s %s', Dict('location/address'),
                              Dict('location/cityLabel'),
                              Dict('location/postalCode'))

        def obj_cost(self):
            cost = TypeDecimal(Dict('characteristics/price'))(self)
            if cost == 0:
                cost = TypeDecimal(Dict('characteristics/priceMin'))(self)
            return cost

        obj_currency = BaseCurrency.get_currency('€')

        def obj_utilities(self):
            are_fees_included = Dict('characteristics/areFeesIncluded',
                                     default=None)(self)
            if are_fees_included:
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.EXCLUDED

        obj_text = CleanHTML(Dict('characteristics/description'))
        obj_url = BrowserURL('housing_html', _id=Env('_id'))

        def obj_area(self):
            area = TypeDecimal(Dict('characteristics/area'))(self)
            if area == 0:
                area = TypeDecimal(Dict('characteristics/areaMin'))(self)
            return area

        obj_date = FromTimestamp(Dict('characteristics/date'))
        obj_bedrooms = TypeDecimal(Dict('characteristics/bedroomCount'))

        def obj_rooms(self):
            # TODO: Why is roomCount a list?
            rooms = Dict('characteristics/roomCount', default=[])(self)
            if rooms:
                return TypeDecimal(rooms[0])(self)
            return NotAvailable

        obj_price_per_meter = PricePerMeterFilter()

        def obj_photos(self):
            photos = []
            for img in Dict('characteristics/images')(self):
                m = re.search('http://thbr\.figarocms\.net.*(http://.*)', img.get('xl'))
                if m:
                    photos.append(HousingPhoto(m.group(1)))
                else:
                    photos.append(HousingPhoto(img.get('xl')))
            return photos

        def obj_DPE(self):
            DPE = Dict(
                'characteristics/energyConsumptionCategory',
                default=""
            )(self)
            return getattr(ENERGY_CLASS, DPE, NotAvailable)

        def obj_GES(self):
            GES = Dict(
                'characteristics/greenhouseGasEmissionCategory',
                default=""
            )(self)
            return getattr(ENERGY_CLASS, GES, NotAvailable)

        def obj_details(self):
            details = {}
            details['fees'] = Dict(
                'characteristics/fees', default=NotAvailable
            )(self)
            details['agencyFees'] = Dict(
                'characteristics/agencyFees', default=NotAvailable
            )(self)
            details['guarantee'] = Dict(
                'characteristics/guarantee', default=NotAvailable
            )(self)
            details['bathrooms'] = Dict(
                'characteristics/bathroomCount', default=NotAvailable
            )(self)
            details['creationDate'] = FromTimestamp(
                                          Dict(
                                              'characteristics/creationDate', default=NotAvailable
                                          ),
                                          default=NotAvailable
            )(self)
            details['availabilityDate'] = Dict(
                'characteristics/estateAvailabilityDate', default=NotAvailable
            )(self)
            details['exposure'] = Dict(
                'characteristics/exposure', default=NotAvailable
            )(self)
            details['heatingType'] = Dict(
                'characteristics/heatingType', default=NotAvailable
            )(self)
            details['floor'] = Dict(
                'characteristics/floor', default=NotAvailable
            )(self)
            details['bedrooms'] = Dict(
                'characteristics/bedroomCount', default=NotAvailable
            )(self)
            details['isFurnished'] = Dict(
                'characteristics/isFurnished', default=NotAvailable
            )(self)
            rooms = Dict('characteristics/roomCount', default=[])(self)
            if len(rooms):
                details['rooms'] = rooms[0]
            details['available'] = Dict(
                'characteristics/isAvailable', default=NotAvailable
            )(self)
            agency = Dict('agency', default=NotAvailable)(self)
            details['agency'] = ', '.join([
                x for x in [
                    agency.get('corporateName', ''),
                    agency.get('corporateAddress', ''),
                    agency.get('corporatePostalCode', ''),
                    agency.get('corporateCity', '')
                ] if x
            ])
            return details
Пример #15
0
    class get_housing(ItemElement):
        klass = Housing

        def parse(self, el):
            self.env['details'] = {
                obj['key']: obj['value_label']
                for obj in el['attributes']
            }

        obj_id = Env('_id')

        obj_area = CleanDecimal(PopDetail('square', default=0),
                                default=NotAvailable)
        obj_rooms = CleanDecimal(PopDetail('rooms', default=0),
                                 default=NotAvailable)

        def obj_GES(self):
            ges = CleanText(PopDetail('ges', default='|'))(self)
            return getattr(ENERGY_CLASS, ges[0], NotAvailable)

        def obj_DPE(self):
            dpe = CleanText(PopDetail('energy_rate', default='|'))(self)
            return getattr(ENERGY_CLASS, dpe[0], NotAvailable)

        def obj_house_type(self):
            value = CleanText(PopDetail('real_estate_type'),
                              default=' ')(self).lower()
            if value == 'parking':
                return HOUSE_TYPES.PARKING
            elif value == 'appartement':
                return HOUSE_TYPES.APART
            elif value == 'maison':
                return HOUSE_TYPES.HOUSE
            elif value == 'terrain':
                return HOUSE_TYPES.LAND
            else:
                return HOUSE_TYPES.OTHER

        def obj_utilities(self):
            value = CleanText(PopDetail('charges_included', default='Non'),
                              default=NotAvailable)(self)
            if value == "Oui":
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.EXCLUDED

        obj_title = Dict('subject')
        obj_cost = CleanDecimal(Dict('price/0', default=NotAvailable),
                                default=Decimal(0))
        obj_currency = BaseCurrency.get_currency(u'€')
        obj_text = Dict('body')
        obj_location = Dict('location/city_label')

        def obj_advert_type(self):
            line_pro = Dict('owner/type')(self)
            if line_pro == u'pro':
                return ADVERT_TYPES.PROFESSIONAL
            else:
                return ADVERT_TYPES.PERSONAL

        obj_date = DateTime(Dict('first_publication_date'))

        def obj_photos(self):
            photos = []
            for img in Dict('images/urls_large', default=[])(self):
                photos.append(HousingPhoto(img))
            return photos

        def obj_type(self):
            try:
                breadcrumb = int(Dict('category_id')(self))
            except ValueError:
                breadcrumb = None

            if breadcrumb == 11:
                return POSTS_TYPES.SHARING
            elif breadcrumb == 10:

                isFurnished = CleanText(PopDetail('furnished',
                                                  default=' '))(self)

                if isFurnished.lower() == u'meublé':
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            else:
                return POSTS_TYPES.SALE

        obj_price_per_meter = PricePerMeterFilter()
        obj_url = Dict('url')
        obj_details = Env('details')