Пример #1
0
        url = "https://immobilier.lefigaro.fr/rest/iClassifieds?"

        url_with_params = url + urlencode(data)
        import requests
        get = requests.get(url_with_params)

        response = await self.client.patient_fetch(
            HTTPRequest(method="GET",
                        url=url_with_params,
                        headers=self.headers))
        soup = BeautifulSoup(response.body.decode('latin-1'), 'lxml')
        return [i.select_one('id').text for i in soup.select('classified')]

    def search_locations(self):
        return ','.join([
            f'PARIS {int(str(i)[-2:])}{"ER" if str(i) == "75001" else "EME"} ({i})'
            for i in self.filter.arrondissements
        ])

    def create_candidate(self, i):
        return Notification(
            id=i['data-classified-id'],
            location=i['data-agency-postal-code'],
            price=i.select('.price-label')[0].text.strip().split('\n')[0])


if __name__ == '__main__':
    f = Filter(arrondissements=[75018], max_price=1500, min_area=27)
    figaro = Figaro(f, False)
    asyncio.get_event_loop().run_until_complete(figaro.run())
Пример #2
0
                ]
            },
        }
        if self.filter.furnished:
            res['query']["furnishing"] = 1
        return res

    async def run(self):
        self.init_auth_token()

        for page in range(1, 10):
            res = await self.client.patient_fetch(
                HTTPRequest(method="POST",
                            url=self.url,
                            body=json.dumps(self.data(page)),
                            headers=self.headers))
            res_json = res.json()
            for c in res_json['items']:
                await self.push_candidate(c)
            if page == res_json['pageCount']:
                break


if __name__ == '__main__':
    pub_filter = Filter(arrondissements=[75001],
                        furnished=True,
                        max_price=1300,
                        min_area=25)
    seloger = Seloger(pub_filter, False)
    asyncio.get_event_loop().run_until_complete(seloger.run())
    print(len(seloger.notifications))
Пример #3
0
            self.filter.min_area,
            "localityIds":
            self.locality_ids,
            "typeIds":
            "2,3,6,7,19",
            "size":
            self.fetch_size,
            "from": (page - 1) * self.fetch_size,
            "releaseDate.gte":
            (datetime.today() - timedelta(days=3)).strftime("%Y-%m-%d")
        }
        if self.filter.furnished:
            querystring["housingIds"] = 1
        res = await self.client.patient_fetch(
            HTTPRequest(
                method="GET",
                headers=self.auth_header,
                url="https://ws-web.avendrealouer.fr/realestate/properties?" +
                urlencode(querystring, safe=',')))
        return json.loads(res.body.decode())


if __name__ == '__main__':
    f = Filter(arrondissements=[
        75001, 75002, 75003, 75004, 75005, 75010, 75011, 75008, 75009
    ],
               max_price=13000,
               min_area=25)
    service = AvendreAlouer(f, False)
    asyncio.get_event_loop().run_until_complete(service.run())
Пример #4
0
from runner import Filter
from services.abstract_service import AbstractService


class StarterService(AbstractService):

    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, *kwargs)
        with self.METRICS_INIT_TIME.time():
            pass

    def get_candidate_native_id(self, candidate) -> str:
        # candidate entity id extractor
        return super().get_candidate_native_id(candidate)

    async def candidate_to_notification(self, c) -> Notification:
        # convert raw flat entity to a find_a_flat.notification_sender.Notification
        return await super().candidate_to_notification(c)

    async def run(self):
        # Main entry point
        # White scraper here use self.push_candidate(candidate) passing a raw website flat entity
        return await super().run()


if __name__ == '__main__':
    f = Filter(arrondissements=[75001, 75002, 75003], max_price=1300, min_area=25)
    service = StarterService(f, False)
    asyncio.get_event_loop().run_until_complete(service.run())

Пример #5
0
        data = {
            'recherche[geo][ids][]': arr,
            'recherche[prix][max]': self.filter.max_price,
            'recherche[produit]': 'location',
            'recherche[typesbien][]': ['maison', 'appartement'],
            'order': 'date-desc',
            'recherche[surface][min]': self.filter.min_area,
            'size': '200'
        }
        if self.filter.furnished:
            data['recherche[tags][]'] = 'meuble'

        url = 'https://ws.pap.fr/immobilier/annonces?' + urlencode(data,
                                                                   doseq=True)

        resp = await self.client.patient_fetch(
            HTTPRequest(method='GET', url=url))
        # 50 - not to go too far in history
        for chunk in chunks(resp.json()['_embedded']['annonce'][:50], 10):
            await asyncio.wait([self.push_candidate(c) for c in chunk])


if __name__ == '__main__':
    f = Filter(arrondissements=[75001, 75002, 75003, 75004],
               max_price=2000,
               min_area=25)
    service = Pap(f, False)
    res = asyncio.get_event_loop().run_until_complete(service.run())
    service.logger.info(res)
Пример #6
0
        }
        res = await self.client.patient_fetch(
            HTTPRequest("http://lisemobile.logic-immo.com/li.search_ads.php?" +
                        urlencode(data)))
        res_json = res.json()
        for c in res_json['items']:
            await self.push_candidate(c)
        return math.ceil(res_json['search']['total'] / items_per_page)

    async def search_localities(self):
        payload = lambda x: {'client': "v8.a", 'fulltext': x}
        resp, _ = await asyncio.wait([
            self.client.patient_fetch(
                HTTPRequest(
                    url=
                    "http://lisemobile.logic-immo.com/li.search_localities.php?"
                    + urlencode(payload(i))))
            for i in self.filter.arrondissements
        ])
        return [[i for i in r.result().json().get('items')
                 if i['level'] == 2][0]['key'] for r in resp]


if __name__ == '__main__':
    f = Filter(arrondissements=[75003],
               max_price=2000,
               min_area=25,
               furnished=True)
    service = LogicImmo(f, False)
    asyncio.get_event_loop().run_until_complete(service.run())
Пример #7
0
        zones = ','.join([i for z in self.filter_zones for i in z['zoneIds']])
        url = 'https://www.bienici.com/realEstateAds.json?' \
              'filters={"size":500,"from":0,"filterType":"rent","propertyType":["house","flat"],' \
              '"maxPrice":' + str(self.filter.max_price) + ',"minArea":' + str(self.filter.min_area) + \
              ',"page":1,"resultsPerPage":2400,"maxAuthorizedResults":2400,"sortBy":"relevance",' \
              '"sortOrder":"desc","onTheMarket":[true],"showAllModels":false,' \
              + ('"isFurnished":true,' if self.filter.furnished else '') + \
              '"zoneIdsByTypes":{"zoneIds":[' + zones + ']}}'

        resp = await self.client.patient_fetch(
            connect_timeout=60,
            request_timeout=60 * 2,
            request=HTTPRequest(method="GET", url=url),
        )
        resp = json.loads(resp.body.decode())
        for c in resp['realEstateAds']:
            await self.push_candidate(c)


if __name__ == '__main__':
    # , 75002, 75003, 75004, 75005, 75010, 75011, 75008, 75009
    f = Filter(
        arrondissements=[75010],
        max_price=1400,
        # furnished=True,
        min_area=35)

    service = BienIci(f, False)
    res = asyncio.get_event_loop().run_until_complete(service.run())
    service.logger.info(res)
Пример #8
0
        nb_els = int(
            re.findall(
                '\d+',
                first_page.select_one(
                    '#bloc_liste_biens .titreSeparation').text)[0])
        page_cnt = math.ceil(nb_els / 30)
        if page_cnt > 1:
            kk = [
                z for i in range(2, page_cnt + 1)
                for z in (await self.get_page(i))[0]
            ]
            candidates.extend(kk)

        for chunk in chunks(candidates, 10):
            await asyncio.wait([self.push_candidate(c) for c in chunk])

    async def get_page(self, page=1):
        url = "https://www.century21.fr/annonces/location-appartement/cp-" + '-'.join(
            [str(i) for i in self.filter.arrondissements]
        ) + f"/s-{self.filter.min_area}-/st-0-/b-0-{self.filter.max_price}/page-{page}/"
        r = await self.client.patient_fetch(HTTPRequest(method="GET", url=url))
        page = BeautifulSoup(r.body.decode(), 'lxml')
        announces = page.select('#blocANNONCES .annoncesListeBien')[0].select(
            '.annonce .contentAnnonce')
        return announces, page


if __name__ == '__main__':
    f = Filter(arrondissements=[75013], max_price=2000, min_area=25)
    service = Century21(f, False)
    asyncio.get_event_loop().run_until_complete(service.run())
Пример #9
0
            'page': x,
            'perPage': ANNOUNCES_PER_PAGE,
            'surface': self.filter.min_area,
            'transaction': 'rent'
        }

        count_res = await self.client.patient_fetch(HTTPRequest(
            method='GET',
            url='https://www.laforet.com/api/immo/properties/count?' +
            urlencode(nb_announces_params)),
                                                    connect_timeout=2,
                                                    request_timeout=6)
        total_count = count_res.json()['count']

        for page in range(1, math.ceil(total_count / ANNOUNCES_PER_PAGE) + 1):
            search_base_url = 'https://www.laforet.com/api/immo/properties?'

            current_url = search_base_url + urlencode(main_params(page))
            get = await self.client.patient_fetch(HTTPRequest(method='GET',
                                                              url=current_url),
                                                  connect_timeout=2,
                                                  request_timeout=6)
            for c in get.json()['data']:
                await self.push_candidate(c)


if __name__ == '__main__':
    laforet = Laforet(
        Filter(arrondissements=[75018], max_price=2000, min_area=27), False)
    asyncio.get_event_loop().run_until_complete(laforet.run())