def __find_similar_ad_from_pic(self, picture): new_hash = phash(Image.open(urlopen(picture))) hashes = [ad.picturehash for ad in Annonce.select()] for old_hash in hashes: if old_hash is not None and hex_to_hash( old_hash) - new_hash < self.HASH_SIMILAR_TRESHOLD: return Annonce.get(Annonce.picturehash == old_hash) else: return False
def post(): ''' Poste les annonces sur Trello ''' posted = 0 for annonce in Annonce.select().where( Annonce.posted2trello == False).order_by(Annonce.site.asc()): title = "%s de %sm² à %s @ %s€" % (annonce.title, annonce.surface, annonce.city, annonce.price) description = "Créé le : %s\n\n" \ "%s pièces, %s chambre(s)\n" \ "Charges : %s\n" \ "Tel : %s\n\n" % \ (annonce.created.strftime("%a %d %b %Y %H:%M:%S"), annonce.rooms, annonce.bedrooms, annonce.charges, annonce.telephone) if annonce.description is not None: description += ">%s" % annonce.description.replace("\n", "\n>") card = get_list(annonce.site).add_card(title, desc=description) # On s'assure que ce soit bien un tableau if annonce.pictures is not None: # Conversion de la chaîne de caractère représentant le tableau d'images en tableau for picture in annonce.pictures: card.attach(url=picture.url, name=annonce.title) card.attach(url=annonce.link) annonce.posted2trello = True annonce.save() posted += 1 return posted
def search(parameters): # Préparation des paramètres de la requête payload = { 'recherche[prix][min]': parameters['price'][0], # Loyer min 'recherche[prix][max]': parameters['price'][1], # Loyer max 'recherche[surface][min]': parameters['surface'][0], # Surface min 'recherche[surface][max]': parameters['surface'][1], # Surface max 'recherche[nb_pieces][min]': parameters['rooms'][0], # Pièces min 'recherche[nb_chambres][min]': parameters['bedrooms'][0], # Chambres min 'size': 200, 'page': 1 } # Insertion des paramètres propres à PAP payload.update(parameters['pap']) params = urlencode(payload) # Ajout des villes for city in parameters['cities']: params += "&recherche[geo][ids][]=%s" % place_search(city[1]) request = requests.get("https://ws.pap.fr/immobilier/annonces", params=unquote(params), headers=header) data = request.json() for ad in data['_embedded']['annonce']: _request = requests.get("https://ws.pap.fr/immobilier/annonces/%s" % ad['id'], headers=header) _data = _request.json() photos = list() if _data.get("nb_photos") > 0: for photo in _data["_embedded"]['photo']: photos.append(photo['_links']['self']['href']) annonce, created = Annonce.create_or_get( id='pap-%s' % _data.get('id'), site="PAP", title="%s %s pièces" % (_data.get("typebien"), _data.get("nb_pieces")), description=str(_data.get("texte")), telephone=_data.get("telephones")[0].replace('.', '') if len(_data.get("telephones")) > 0 else None, created=datetime.fromtimestamp(_data.get("date_classement")), price=_data.get('prix'), surface=_data.get('surface'), rooms=_data.get('nb_pieces'), bedrooms=_data.get('nb_chambres_max'), city=_data["_embedded"]['place'][0]['title'], link=_data["_links"]['desktop']['href'], picture=photos) if created: annonce.save()
def search(parameters): # Préparation des paramètres de la requête payload = { 'client': "v8.a.3", 'price_range': "%s,%s" % (parameters['price'][0], parameters['price'][1]), # Loyer 'area_range': "%s,%s" % (parameters['surface'][0], parameters['surface'][1]), # Surface 'rooms_range': "%s,%s" % (parameters['rooms'][0], parameters['rooms'][1]), # Pièces 'bedrooms_range': "%s,%s" % (parameters['bedrooms'][0], parameters['bedrooms'][1]), # Chambres 'localities': ','.join(key for key in search_city_code(parameters['cities'])) } # Insertion des paramètres propres à LeBonCoin payload.update(parameters['logic-immo']) request = requests.post( "http://lisemobile.logic-immo.com/li.search_ads.php", params=payload, headers=header) data = request.json() for ad in data['items']: annonce, created = Annonce.get_or_create( id='logic-immo-' + ad['identifiers']['main'], site="Logic Immo", created=datetime.fromtimestamp(ad['info']['firstOnlineDate']), title="%s %s pièces" % (ad['info']['propertyType']['name'], ad['properties']['rooms']), description=ad['info']['text'], telephone=ad['contact'].get('phone'), price=ad['pricing']['amount'], surface=ad['properties']['area'], rooms=ad['properties']['rooms'], bedrooms=ad['properties'].get('bedrooms'), city=ad['location']['city']['name'], link=ad['info']['link']) if created: pictures = [ picture.replace("[WIDTH]", "1440").replace("[HEIGHT]", "956").replace( "[SCALE]", "3.5") for picture in ad.get('pictures') ] for picture in pictures: Picture.create(url=picture, annonce=annonce) annonce.save()
def addAnnonces(annonces): if annonces['products'] == []: return False for annonce in annonces['products']: dbAnnonce, created = Annonce.create_or_get( id='seloger-' + annonce['idannonce'], price=annonce['prix'], surface=annonce['surface'], postalCode=annonce['codepostal'], transactionType=annonce['typedetransaction'][0] ) if created: dbAnnonce.save() return True
def search(parameters): # Préparation des paramètres de la requête payload = { 'mrs': parameters['price'][0], # Loyer min 'mre': parameters['price'][1], # Loyer max 'sqs': surface_value(parameters['surface'][0]), # Surface min 'sqe': surface_value(parameters['surface'][1]), # Surface max 'ros': parameters['rooms'][0], # Pièces min 'roe': parameters['rooms'][1], # Pièces max 'zipcode': ','.join(str(cp[1]) for cp in parameters['cities']), 'city': ','.join(cp[0] for cp in parameters['cities']) } # Insertion des paramètres propres à LeBonCoin payload.update(parameters['leboncoin']) header = { 'User-Agent': 'fr.leboncoin.android , Sony D5803 , 6.0.1', 'Content-Type': 'application/x-www-form-urlencoded', 'Connection': 'Keep-Alive', 'Accept-Encoding': 'gzip' } # Token de l'application Android Leboncoin token = "app_id=leboncoin_android&key=d2c84cdd525dddd7cbcc0d0a86609982c2c59e22eb01ee4202245b7b187f49f1546e5f027d48b8d130d9aa918b29e991c029f732f4f8930fc56dbea67c5118ce" request = requests.post( "https://mobile.leboncoin.fr/templates/api/list.json", params=payload, headers=header, data=token) data = request.json() for ad in data['ads']: _payload = {'ad_id': ad['list_id']} _request = requests.post( "https://mobile.leboncoin.fr/templates/api/view.json", params=_payload, headers=header, data=token) _data = _request.json() rooms, surface = 0, 0 for param in _data.get('parameters'): if param['id'] == 'rooms': rooms = param['value'] if param['id'] == 'square': surface = param['value'].replace(" m²", "") annonce, created = Annonce.get_or_create( id='lbc-' + _data.get('list_id'), defaults={ 'site': "Leboncoin Pro" if ad['company_ad'] == 1 else "Leboncoin Particulier", 'created': datetime.strptime(_data.get('formatted_date'), "%d/%m/%Y à %Hh%M"), 'title': BeautifulSoup(_data.get('subject'), "lxml").text, 'description': BeautifulSoup( _data.get('body').replace("<br />", "\n"), "lxml").text, 'telephone': _data.get("phone"), 'price': _data.get('price').replace(" ", ""), 'surface': surface, 'rooms': rooms, 'city': _data.get('zipcode'), 'link': "https://www.leboncoin.fr/locations/%s.htm?ca=12_s" % _data.get('list_id'), }) if created: annonce.save()
def search(parameters): """Interroge le service et insère en base si l'id n'a pas été retrouvé""" payload = { 'recherche[prix][min]': parameters['price'][0], # Loyer min 'recherche[prix][max]': parameters['price'][1], # Loyer max 'recherche[surface][min]': parameters['surface'][0], # Surface min 'recherche[surface][max]': parameters['surface'][1], # Surface max 'recherche[nb_pieces][min]': parameters['rooms'][0], # Pièces min 'recherche[nb_chambres][min]': parameters['bedrooms'][0], # Chambres min 'size': 200, 'page': 1 } # Insertion des paramètres propres à PAP payload.update(parameters['pap']) params = urlencode(payload) # Ajout des villes (code spécifique à pap - mapping avec le code postal) for city in parameters['cities']: params += "&recherche[geo][ids][]=%s" % place_search(city[1]) # interroge le service request = requests.get("https://ws.pap.fr/immobilier/annonces", params=unquote(params), headers=header) data = request.json() if not 'annonce' in data['_embedded']: return for ad in data['_embedded']['annonce']: _request = requests.get("https://ws.pap.fr/immobilier/annonces/%s" % ad['id'], headers=header) _data = _request.json() photos = list() if _data.get("nb_photos") > 0: for photo in _data["_embedded"]['photo']: photos.append(photo['_links']['self']['href']) # insertion en base si l'id de l'annonce n'a pas été trouvé annonce, created = Annonce.get_or_create( id='pap-%s' % _data.get('id'), site="PAP", title="%s %s pièces" % (_data.get("typebien"), _data.get("nb_pieces")), description=str(_data.get("texte")), telephone=_data.get("telephones")[0].replace('.', '') if len(_data.get("telephones")) > 0 else None, created=datetime.fromtimestamp(_data.get("date_classement")), #"" price=_data.get('prix'), surface=_data.get('surface'), rooms=_data.get('nb_pieces'), bedrooms=_data.get('nb_chambres_max'), city=_data["_embedded"]['place'][0]['title'], link=_data["_links"]['desktop']['href'], picture=','.join(photos)) if created: annonce.save() print(annonce.as_text())
def save(self, uid, site, created, title, city, link, price, surface, description=None, telephone=None, rooms=None, bedrooms=None, picture=None): is_duplicate = False similar_ad = None # ad already exists ? try: Annonce.get_by_id(uid) return False except DoesNotExist: pass # ad exists as similar ad ? if picture is not None: for pic in picture: similar_ad = self.__find_similar_ad_from_pic(pic) if similar_ad: logging.info("(" + site + ") ad for " + title + " already exists : " + link + " = " + similar_ad.link) is_duplicate = True if similar_ad.posted2trello: TrelloModule().add_new_link(similar_ad, link) break else: # the similar ad is not yet on trello, will process and save this similar ad the next launch return False annonce = Annonce.create( id=uid, site=site, created=created, title=title, description=description, telephone=telephone, price=price, surface=surface, rooms=rooms, bedrooms=bedrooms, city=city, link=link, picture=picture, picturehash=phash(Image.open(urlopen(picture[0]))) if (picture is not None and len(picture) > 0) else None, posted2trello=is_duplicate, isduplicate=is_duplicate, trelloid=similar_ad.idtrello if similar_ad else None) logging.info("(" + site + ") new ad saved : " + title + ("(duplicate)" if is_duplicate else "")) annonce.save() return True
def search(parameters): # Préparation des paramètres de la requête payload = { 'px_loyermin': parameters['price'][0] if parameters['seloger']['idtt'] == 1 else None, 'px_loyermax': parameters['price'][1] if parameters['seloger']['idtt'] == 1 else None, 'pxmin': parameters['price'][0] if parameters['seloger']['idtt'] == 2 else None, 'pxmax': parameters['price'][1] if parameters['seloger']['idtt'] == 2 else None, 'surfacemin': parameters['surface'][0], 'surfacemax': parameters['surface'][1], # Si parameters['rooms'] = (2, 4) => "2,3,4" 'nbpieces': list(range(parameters['rooms'][0], parameters['rooms'][1] + 1)), # Si parameters['bedrooms'] = (2, 4) => "2,3,4" 'nb_chambres': list(range(parameters['bedrooms'][0], parameters['bedrooms'][1] + 1)), 'ci': [int(cp[2]) for cp in parameters['cities']] } # Insertion des paramètres propres à LeBonCoin payload.update(parameters['seloger']) headers = { 'user-agent': 'Dalvik/2.1.0 (Linux; U; Android 6.0.1; D5803 Build/MOB30M.Z1)' } request = requests.get("http://ws.seloger.com/search_4.0.xml", params=payload, headers=headers) xml_root = ET.fromstring(request.text) for annonceNode in xml_root.findall('annonces/annonce'): # Seconde requête pour obtenir la description de l'annonce _payload = { 'noAudiotel': 1, 'idAnnonce': annonceNode.findtext('idAnnonce') } _request = requests.get("http://ws.seloger.com/annonceDetail_4.0.xml", params=_payload, headers=headers) photos = list() for photo in annonceNode.find("photos"): photos.append(photo.findtext("stdUrl")) annonce, created = Annonce.get_or_create( id='seloger-' + annonceNode.find('idAnnonce').text, site='SeLoger', # SeLoger peut ne pas fournir de titre pour une annonce T_T title="Appartement " + annonceNode.findtext('nbPiece') + " pièces" if annonceNode.findtext('titre') is None else annonceNode.findtext('titre'), description=ET.fromstring(_request.text).findtext("descriptif"), telephone=ET.fromstring( _request.text).findtext("contact/telephone"), created=datetime.strptime(annonceNode.findtext('dtCreation'), '%Y-%m-%dT%H:%M:%S'), price=annonceNode.find('prix').text, charges=annonceNode.find('charges').text, surface=annonceNode.find('surface').text, rooms=annonceNode.find('nbPiece').text, bedrooms=annonceNode.find('nbChambre').text, city=annonceNode.findtext('ville'), link=annonceNode.findtext('permaLien')) if created: for photo in photos: Picture.create(url=photo, annonce=annonce) annonce.save()
def search(parameters): # Préparation des paramètres de la requête payload = { "limit": 35, "limit_alu": 3, "filters": { "enums": { "ad_type": ["offer"] }, "category": { "id": "10" }, "location": { "locations": [] }, "ranges": { "square": { "min": parameters['surface'][0], "max": parameters['surface'][1] }, "price": { "min": parameters['price'][0], "max": parameters['price'][1] } }, "keywords": {} } } for cp in parameters['cities']: payload['filters']['location']['locations'].append( {'zipcode': str(cp[1])}) header = {'api_key': 'ba0c2dad52b3ec'} request = requests.post("https://api.leboncoin.fr/finder/search", json=payload, headers=header) data = request.json() for ad in data['ads']: try: annonce = Annonce.get(id='lbc-' + str(ad['list_id'])) except: _request = requests.get( "https://api.leboncoin.fr/finder/classified/" + str(ad['list_id']), headers=header) _data = _request.json() rooms, surface = 0, 0 for param in _data.get('attributes'): if param['key'] == 'rooms': rooms = param['value'] if param['key'] == 'square': surface = param['value'].replace(" m²", "") annonce, created = Annonce.get_or_create( id='lbc-' + str(_data.get('list_id')), defaults={ 'site': "Leboncoin Pro" if ad['owner']['no_salesmen'] == False else "Leboncoin Particulier", 'created': datetime.strptime(_data.get('first_publication_date'), "%Y-%m-%d %H:%M:%S"), 'title': BeautifulSoup(_data.get('subject'), "lxml").text, 'description': BeautifulSoup( _data.get('body').replace("<br />", "\n"), "lxml").text, 'telephone': _data.get("phone"), 'price': _data.get('price')[0], 'surface': surface if surface.replace('.', '', 1).isdigit() else 0, 'rooms': rooms, 'city': _data.get('zipcode') if _data.get('zipcode') is not None else '', 'link': "https://www.leboncoin.fr/locations/%s.htm?ca=12_s" % _data.get('list_id'), 'picture': _data['images']['urls_large'] if 'urls_large' in _data['images'] else [] }) if created: annonce.save()
def search(parameters): # Préparation des paramètres de la requête payload = { 'px_loyermin': parameters['price'][0], 'px_loyermax': parameters['price'][1], 'surfacemin': parameters['surface'][0], 'surfacemax': parameters['surface'][1], # Si parameters['rooms'] = (2, 4) => "2,3,4" 'nbpieces': list(range(parameters['rooms'][0], parameters['rooms'][1] + 1)), # Si parameters['bedrooms'] = (2, 4) => "2,3,4" 'nb_chambres': list(range(parameters['bedrooms'][0], parameters['bedrooms'][1] + 1)), 'ci': [int(cp[2]) for cp in parameters['cities']] } # Insertion des paramètres propres à LeBonCoin payload.update(parameters['seloger']) headers = {'user-agent': 'Dalvik/2.1.0 (Linux; U; Android 6.0.1; D5803 Build/MOB30M.Z1)'} request = requests.get("http://ws-seloger.svc.groupe-seloger.com/search_4.0.xml", params=payload, headers=headers) xml_root = ET.fromstring(request.text) for annonceNode in xml_root.findall('annonces/annonce'): # Seconde requête pour obtenir la description de l'annonce # TODO: seulement si id absent de la base (limite les requetes api) try: annonce = Annonce.get( id = 'seloger-' + annonceNode.find('idAnnonce').text ) except: _payload = {'noAudiotel': 1, 'idAnnonce': annonceNode.findtext('idAnnonce')} _request = requests.get("http://ws-seloger.svc.groupe-seloger.com/annonceDetail_4.0.xml", params=_payload, headers=headers) photos = list() for photo in annonceNode.find("photos"): photos.append(photo.findtext("stdUrl")) # Une seule photo, la première # break annonce, created = Annonce.get_or_create( id = 'seloger-' + annonceNode.find('idAnnonce').text, site = 'SeLoger', defaults = { # SeLoger peut ne pas fournir de titre pour une annonce T_T 'title': "Appartement " + annonceNode.findtext('nbPiece') + " pièces" if annonceNode.findtext('titre') is None else annonceNode.findtext('titre'), 'description': ET.fromstring(_request.text).findtext("descriptif"), 'telephone': ET.fromstring(_request.text).findtext("contact/telephone"), 'created': datetime.strptime(annonceNode.findtext('dtCreation'), '%Y-%m-%dT%H:%M:%S'), 'price': annonceNode.find('prix').text, 'charges': annonceNode.find('charges').text, 'surface': annonceNode.find('surface').text, 'rooms': annonceNode.find('nbPiece').text, 'bedrooms': annonceNode.find('nbChambre').text, 'city': annonceNode.findtext('ville'), 'link': annonceNode.findtext('permaLien'), 'picture': photos } ) if created: annonce.save()