url = "https://immobilier.lefigaro.fr/rest/iClassifieds?" url_with_params = url + urlencode(data) import requests get = requests.get(url_with_params) response = await self.client.patient_fetch( HTTPRequest(method="GET", url=url_with_params, headers=self.headers)) soup = BeautifulSoup(response.body.decode('latin-1'), 'lxml') return [i.select_one('id').text for i in soup.select('classified')] def search_locations(self): return ','.join([ f'PARIS {int(str(i)[-2:])}{"ER" if str(i) == "75001" else "EME"} ({i})' for i in self.filter.arrondissements ]) def create_candidate(self, i): return Notification( id=i['data-classified-id'], location=i['data-agency-postal-code'], price=i.select('.price-label')[0].text.strip().split('\n')[0]) if __name__ == '__main__': f = Filter(arrondissements=[75018], max_price=1500, min_area=27) figaro = Figaro(f, False) asyncio.get_event_loop().run_until_complete(figaro.run())
] }, } if self.filter.furnished: res['query']["furnishing"] = 1 return res async def run(self): self.init_auth_token() for page in range(1, 10): res = await self.client.patient_fetch( HTTPRequest(method="POST", url=self.url, body=json.dumps(self.data(page)), headers=self.headers)) res_json = res.json() for c in res_json['items']: await self.push_candidate(c) if page == res_json['pageCount']: break if __name__ == '__main__': pub_filter = Filter(arrondissements=[75001], furnished=True, max_price=1300, min_area=25) seloger = Seloger(pub_filter, False) asyncio.get_event_loop().run_until_complete(seloger.run()) print(len(seloger.notifications))
self.filter.min_area, "localityIds": self.locality_ids, "typeIds": "2,3,6,7,19", "size": self.fetch_size, "from": (page - 1) * self.fetch_size, "releaseDate.gte": (datetime.today() - timedelta(days=3)).strftime("%Y-%m-%d") } if self.filter.furnished: querystring["housingIds"] = 1 res = await self.client.patient_fetch( HTTPRequest( method="GET", headers=self.auth_header, url="https://ws-web.avendrealouer.fr/realestate/properties?" + urlencode(querystring, safe=','))) return json.loads(res.body.decode()) if __name__ == '__main__': f = Filter(arrondissements=[ 75001, 75002, 75003, 75004, 75005, 75010, 75011, 75008, 75009 ], max_price=13000, min_area=25) service = AvendreAlouer(f, False) asyncio.get_event_loop().run_until_complete(service.run())
from runner import Filter from services.abstract_service import AbstractService class StarterService(AbstractService): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, *kwargs) with self.METRICS_INIT_TIME.time(): pass def get_candidate_native_id(self, candidate) -> str: # candidate entity id extractor return super().get_candidate_native_id(candidate) async def candidate_to_notification(self, c) -> Notification: # convert raw flat entity to a find_a_flat.notification_sender.Notification return await super().candidate_to_notification(c) async def run(self): # Main entry point # White scraper here use self.push_candidate(candidate) passing a raw website flat entity return await super().run() if __name__ == '__main__': f = Filter(arrondissements=[75001, 75002, 75003], max_price=1300, min_area=25) service = StarterService(f, False) asyncio.get_event_loop().run_until_complete(service.run())
data = { 'recherche[geo][ids][]': arr, 'recherche[prix][max]': self.filter.max_price, 'recherche[produit]': 'location', 'recherche[typesbien][]': ['maison', 'appartement'], 'order': 'date-desc', 'recherche[surface][min]': self.filter.min_area, 'size': '200' } if self.filter.furnished: data['recherche[tags][]'] = 'meuble' url = 'https://ws.pap.fr/immobilier/annonces?' + urlencode(data, doseq=True) resp = await self.client.patient_fetch( HTTPRequest(method='GET', url=url)) # 50 - not to go too far in history for chunk in chunks(resp.json()['_embedded']['annonce'][:50], 10): await asyncio.wait([self.push_candidate(c) for c in chunk]) if __name__ == '__main__': f = Filter(arrondissements=[75001, 75002, 75003, 75004], max_price=2000, min_area=25) service = Pap(f, False) res = asyncio.get_event_loop().run_until_complete(service.run()) service.logger.info(res)
} res = await self.client.patient_fetch( HTTPRequest("http://lisemobile.logic-immo.com/li.search_ads.php?" + urlencode(data))) res_json = res.json() for c in res_json['items']: await self.push_candidate(c) return math.ceil(res_json['search']['total'] / items_per_page) async def search_localities(self): payload = lambda x: {'client': "v8.a", 'fulltext': x} resp, _ = await asyncio.wait([ self.client.patient_fetch( HTTPRequest( url= "http://lisemobile.logic-immo.com/li.search_localities.php?" + urlencode(payload(i)))) for i in self.filter.arrondissements ]) return [[i for i in r.result().json().get('items') if i['level'] == 2][0]['key'] for r in resp] if __name__ == '__main__': f = Filter(arrondissements=[75003], max_price=2000, min_area=25, furnished=True) service = LogicImmo(f, False) asyncio.get_event_loop().run_until_complete(service.run())
zones = ','.join([i for z in self.filter_zones for i in z['zoneIds']]) url = 'https://www.bienici.com/realEstateAds.json?' \ 'filters={"size":500,"from":0,"filterType":"rent","propertyType":["house","flat"],' \ '"maxPrice":' + str(self.filter.max_price) + ',"minArea":' + str(self.filter.min_area) + \ ',"page":1,"resultsPerPage":2400,"maxAuthorizedResults":2400,"sortBy":"relevance",' \ '"sortOrder":"desc","onTheMarket":[true],"showAllModels":false,' \ + ('"isFurnished":true,' if self.filter.furnished else '') + \ '"zoneIdsByTypes":{"zoneIds":[' + zones + ']}}' resp = await self.client.patient_fetch( connect_timeout=60, request_timeout=60 * 2, request=HTTPRequest(method="GET", url=url), ) resp = json.loads(resp.body.decode()) for c in resp['realEstateAds']: await self.push_candidate(c) if __name__ == '__main__': # , 75002, 75003, 75004, 75005, 75010, 75011, 75008, 75009 f = Filter( arrondissements=[75010], max_price=1400, # furnished=True, min_area=35) service = BienIci(f, False) res = asyncio.get_event_loop().run_until_complete(service.run()) service.logger.info(res)
nb_els = int( re.findall( '\d+', first_page.select_one( '#bloc_liste_biens .titreSeparation').text)[0]) page_cnt = math.ceil(nb_els / 30) if page_cnt > 1: kk = [ z for i in range(2, page_cnt + 1) for z in (await self.get_page(i))[0] ] candidates.extend(kk) for chunk in chunks(candidates, 10): await asyncio.wait([self.push_candidate(c) for c in chunk]) async def get_page(self, page=1): url = "https://www.century21.fr/annonces/location-appartement/cp-" + '-'.join( [str(i) for i in self.filter.arrondissements] ) + f"/s-{self.filter.min_area}-/st-0-/b-0-{self.filter.max_price}/page-{page}/" r = await self.client.patient_fetch(HTTPRequest(method="GET", url=url)) page = BeautifulSoup(r.body.decode(), 'lxml') announces = page.select('#blocANNONCES .annoncesListeBien')[0].select( '.annonce .contentAnnonce') return announces, page if __name__ == '__main__': f = Filter(arrondissements=[75013], max_price=2000, min_area=25) service = Century21(f, False) asyncio.get_event_loop().run_until_complete(service.run())
'page': x, 'perPage': ANNOUNCES_PER_PAGE, 'surface': self.filter.min_area, 'transaction': 'rent' } count_res = await self.client.patient_fetch(HTTPRequest( method='GET', url='https://www.laforet.com/api/immo/properties/count?' + urlencode(nb_announces_params)), connect_timeout=2, request_timeout=6) total_count = count_res.json()['count'] for page in range(1, math.ceil(total_count / ANNOUNCES_PER_PAGE) + 1): search_base_url = 'https://www.laforet.com/api/immo/properties?' current_url = search_base_url + urlencode(main_params(page)) get = await self.client.patient_fetch(HTTPRequest(method='GET', url=current_url), connect_timeout=2, request_timeout=6) for c in get.json()['data']: await self.push_candidate(c) if __name__ == '__main__': laforet = Laforet( Filter(arrondissements=[75018], max_price=2000, min_area=27), False) asyncio.get_event_loop().run_until_complete(laforet.run())