def converted_price(price, pr_link): price_value = "" try: for price_index in string_to_int(price): price_value += str(price_index) except: log_error(pr_link, "ფასის ან ნომრის", True) return price_value
def get_links(): for_len = requests.get(f'https://ss.ge/en/real-estate/l?Page=2&PriceType=false&CurrencyId=1') page_len = string_to_int(Selector(response=for_len).css(_selectors["page_len"]).get())[0] print(page_len) for page_index in range(page_len): products_req = requests.get(f'https://ss.ge/en/real-estate/l?Page={str(page_index)}&PriceType=false&CurrencyId=1') products_link = Selector(response=products_req).css(_selectors["item_links"]).getall() for _link in products_link: _print.value("https://ss.ge"+_link) if not is_duplicate(links_db, "link", "https://ss.ge"+_link): links_db.insert_one({ 'link': "https://ss.ge"+_link, 'parsed': False, "source": "ss.ge" }) else: _print.fail("დუპლიკატი")
def get_links(): for _product_type in product_types: for_len = requests.get(f'https://www.home.ge/en/{_product_type}.html') page_len = string_to_int( Selector(response=for_len).css(_selectors["page_len"]).get())[0] if page_len == 0: page_len = 1 for page_index in range(page_len): products_req = requests.get( f'https://www.home.ge/en/{_product_type}/index{str(page_index)}.html' ) products_link = Selector(response=products_req).css( _selectors["item_links"]).getall() for _link in products_link: _print.value(_link) if not is_duplicate(links_db, "link", _link): links_db.insert_one({'link': _link, 'parsed': False}) else: _print.fail("დუპლიკატი")
def get_products(): for prod in links_db.find({"parsed": False}): req = requests.get(prod["link"]) _print.value(prod["link"]) city = select_one(req, "#df_field_mdebareoba .value", True) bread_crumbs = select_many(req, "#bread_crumbs .point1 li a::text") geonames_id = geo_names(city) deal_type = get_deal_type(bread_crumbs[2], prod['link']) property_type = get_property_type(bread_crumbs[1], prod['link']) status = get_status( select_one(req, "#df_field_built_status .value", True), prod["link"]) street = select_one(req, "#df_field_mdebareoba_level1 .value", True) address = select_one(req, "#df_field_mdebareoba_level2 .value", True) bedrooms = select_one(req, "#df_field_bedrooms .value", True) bathrooms = select_one(req, "#df_field_bathrooms .value", True) total_area = string_to_int( select_one(req, "#df_field_square_feet .value", True))[0] floor = select_one(req, "#df_field_floor .value", True) floors = select_one(req, "#df_field_number_of_floors .value", True) try: _view = int(select_one(req, "#area_listing .count::text")) except: _view = 0 outdoor_features = get_outdoor_features(req) indoor_features = get_indoor_features(req) climate_control = get_climate_control(req) details = [{ "title": select_one(req, "#area_listing > h1"), "house_rules": "", "description": Translate( select_one(req, "#df_field_additional_information .value", True)) }] price = { "price_type": "total_price", "min_price": 0, "max_price": 0, "fix_price": converted_price(select_one(req, "#lm_loan_amount::attr(value)"), prod["link"]), "currency": "USD" } phones = [{ "country_code": 995, "number": converted_price(select_one(req, "#df_field_phone .value a::text"), prod["link"]) }] files = get_images(req) try: real_estate_db.insert_one({ "location": { "country": { "id": "GE" }, "city": { "id": geonames_id, "name": city, "subdivision": "" }, "street": street, "address": address, }, "created_at": datetime.datetime.utcnow(), "deal_type": deal_type, "type_of_property": [property_type], "status": status, "bedrooms": bedrooms, "bathrooms": bathrooms, "total_area": total_area, "metric": "feet_square", "floor": floor, "floors": floors, "car_spaces": 0, "is_agent": True, "outdoor_features": outdoor_features, "indoor_features": indoor_features, "climate_control": climate_control, "detail": details, "price": price, "phones": phones, "files": files, "source": "Home.ge", "view": _view }) links_db.update_one({"link": prod["link"]}, {"$set": { "parsed": True }}) except: log_error(req.url, "პროდუქტის", True)
def get_products(): from translator import Translate for prod in links_db.find({"parsed":False, "source": "ss.ge"0}): req = requests.get(prod["link"]) _print.value(prod["link"]) bread_crumbs = select_many(req, ".detailed_page_navlist ul li a::text") city = bread_crumbs[3] geonames_id = geo_names(city) deal_type = get_deal_type(bread_crumbs[2].strip(), prod['link']) property_type = get_property_type(bread_crumbs[1].strip(), prod['link']) status = get_status(select_one(req, "fieldValueStatusId2::text"), prod["link"]) street = select_one(req, ".StreeTaddressList.realestatestr:text").strip() address = street bedrooms = int(select_many(req, ".ParamsHdBlk text::text")[2]) bathrooms = "" total_area = string_to_int(select_many(req, ".ParamsHdBlk text::text")[0])[0] floor = string_to_int(select_many(req, ".ParamsHdBlk text::text")[3]) floors = string_to_int(select_one(req, ".ParamsHdBlk text text span::text")) try: _view = int(select_one(req, "#.article_views span::text")) except: _view = 0 outdoor_features = get_outdoor_features(req) indoor_features = get_indoor_features(req) climate_control = get_climate_control(req) details = [{ "title": select_one(req, "#area_listing > h1"), "house_rules": "", "description": Translate(select_one(req, "#df_field_additional_information .value", True)) }] price ={ "price_type":"total_price", "min_price":0, "max_price":0, "fix_price": converted_price(select_one(req, "#lm_loan_amount::attr(value)"),prod["link"]), "currency": "USD" } phones = [{ "country_code":995, "number": converted_price(select_one(req, "#df_field_phone .value a::text"),prod["link"]) }] files = get_images(req) pprint({ "location": { "country":{ "id":"GE" }, "city": { "id":geonames_id, "name": city, "subdivision": "" }, "street":street, "address": address, }, "created_at": datetime.datetime.utcnow(), "deal_type": deal_type, "type_of_property": [property_type], "status": status, "bedrooms": bedrooms, "bathrooms": bathrooms, "total_area":total_area, "metric":"feet_square", "floor":floor, "floors":floors, "car_spaces":0, "is_agent": True, "outdoor_features":outdoor_features, "indoor_features":indoor_features, "climate_control":climate_control, "detail":details, "price":price, "phones":phones, "files": files, "source": "Home.ge", "view":_view })