def main(): database_handler = DatabaseHandler() if predict_prices_using_price_parameters: filename = 'estimated_prices.csv' else: filename = 'estimated_prices_based_on_no_price_parameters.csv' try: with open(filename, mode='a') as estimated_prices_file: estimated_prices_writer = csv.writer(estimated_prices_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) for bucket in classification_buckets: model_filename = get_model_filename_b(bucket) model = keras_load_model(model_filename) if predict_prices_using_price_parameters: procedure = 'GetDataToParcelsValuation' else: procedure = 'GetDataToParcelsValuationWithoutPriceParameters' df_parcels_to_valuation = database_handler.execute_query( "EXEC dbo.{} " "@LimitDate = {}, " "@BucketType={}, " "@ExcludedList='{}'".format(procedure, limit_date, parcel_prices_mapping[bucket], excluded_values)) data_size = df_parcels_to_valuation.shape[1] # skip the first attribute - OBJECTID and the last attribute - Sale_Amount, which will be predicted x = df_parcels_to_valuation.iloc[:, 1:data_size - 1] prediction = model.predict(x) for (prediction_value, object_id) in zip(prediction, df_parcels_to_valuation['OBJECTID']): if prediction_value[0] < 0: prediction_value[0] = 0 estimated_prices_writer.writerow( [object_id, np.uint64(round(prediction_value[0], 0))]) finally: database_handler.close_connection()
def recommendations(movies_watched, df): cos_sim = find_similarity(df) movies = [] indexes = [] db = DatabaseHandler("user1") #Initialize an empty Series object to sum all the scores across several movies that the user has seen summed_score_series = pd.Series(0, dtype="float64") #TO-DO: Handle case if for some reason no movie is in the list for title in movies_watched: #Find index for the movie index = index_from_title(df, title) #Save all the indexes for the movies that the user has seen (used for filtering later) indexes += indexes_from_title(df, title) #Create a series of all the others titles and their similarity score_series = pd.Series(cos_sim[index - 1]) #Fetch user rating and invert the similarity values if the user did not like the movie rating = db.get_rating(title) print(f"{title} has rating: {rating}") if (rating == -1): score_series = score_series.apply(lambda x: 1 - x) #Add the series to a summed series that aggregates the similarity scores for all movies prev. seen summed_score_series = summed_score_series.add(score_series, fill_value=0) db.close_connection() #Sort the series with the most similar one at index 0 summed_score_series = summed_score_series.sort_values(ascending=False) #Create a list containing the indexes for the top movies. #If no top movie has been seen => 0-10. If one is seen already => 0-11 #TO-DO:Handle if the dataframe containes less movies than we expect top_indexes = list(summed_score_series.iloc[0:(10 + len(indexes))].index) #Remove index for movie already seen. Should result in a list length of 10 top_indexes_filtered = [n for n in top_indexes if n not in indexes] for i in top_indexes_filtered: #print(f'adding movie{(list(df.index)[i])}') movies.append(list(df.index)[i]) return movies[0:10]
class Main: def __init__(self): self.request_count = 0 self.dh = DatabaseHandler('database') if not self.dh.fill_database(): raise RuntimeError("Error during filling the database") def request_house_profile(self, addr: tuple, simple: bool = True): if simple: url = ('https://www.reformagkh.ru/search/houses?query=' '{}+{}+{}+{}+{}&mh=on'.format(*addr)) \ .replace('.0', '').replace(' ', '+') r = requests.get(url) while '403' in str(r): print('Connection refused. Wait for 30 sec') time.sleep(30) else: m = re.search(r'/myhouse/profile/view/[0-9]+', r.text) if m: self.dh.insert_result( self.request_house_info(m.group(0)) + (addr[-1], )) self.dh.update(addr[-1], code=-1) else: self.dh.update(addr[-1], code=addr[-2] + 1) def request_house_info(self, profile_url: str) -> tuple: url = 'https://www.reformagkh.ru{}'.format(profile_url) r = requests.get(url) text = ' '.join(r.text.split('\n')) year = re.search( r'Год ввода дома в эксплуатацию.*?<span>(?P<year>.*?)</span>', text).group('year').strip() # print(year) stages = re.search( r'Количество этажей.*?<span>наибольшее.*?' \ r'<span>(?P<stages>.*?)</span>', text ).group('stages').strip() # print(stages) # TODO change date format last_change = ' '.join(re.search( r'Последнее изменение анкеты.*?' \ r'<span class="black_text">(?P<last_change>.*?)</span>', text ).group('last_change').strip().split()) # print(last_change) series = re.search( r'Серия, тип постройки здания.*?<span>(?P<series>.*?)</span>', text).group('series').strip() # print(series) building_type = series house_type = re.search(r'Тип дома.*?<span>(?P<house_type>.*?)</span>', text).group('house_type').strip() # print(house_type) is_wreck = re.search( r'Дом признан аварийным.*?<span>(?P<is_wreck>.*?)</span>', text).group('is_wreck').strip() is_wreck = 1 if is_wreck == 'Да' else 0 # print(is_wreck) cadaster_number = re.search( r'Кадастровый номер.*?10px;">(?P<cadaster_number>.*?)</td>', text).group('cadaster_number').strip() # print(cadaster_number) overlapping_type = re.search( r'Тип перекрытий.*?<span>(?P<overlapping_type>.*?)</span>', text).group('overlapping_type').strip() # print(overlapping_type) wall_material = re.search( r'Материал несущих стен.*?<span>(?P<wall_material>.*?)</span>', text).group('wall_material').strip() # print(wall_material) return ( year, stages, last_change, series, building_type, house_type, is_wreck, cadaster_number, overlapping_type, wall_material, ) def run(self): while True: try: self.request_count += 1 print('Request #{}'.format(self.request_count)) print('-' * 16) dr = self.dh.database_reader() for addr in dr: self.request_house_profile(addr) time.sleep(3) except KeyboardInterrupt: print('\nResults:') self.dh.check_found() self.dh.count_brick_houses() self.dh.found_max_stages() print('\nBye.') self.dh.close_connection() break