def normal_distribution(): residence_table = Residences() # residences = residence_table.get_residences(5000) residences = residence_table.get_all_residences() residences_df = pd.DataFrame(residences) prices = residences_df['price'] standard_deviation = prices.std() mu = prices.mean() variance = prices.var() sigma = math.sqrt(variance) x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 50) plt.style.use('ggplot') plt.plot(x, stats.norm.pdf(x, mu, sigma)) plt.title('Distribuția normală a prețurilor') plt.xlabel('Prețul chiriei lunare') plt.ylabel('Probabilitatea de densitate') plt.suptitle('Valoarea medie: ' + str(round(mu, 2)) + '\nDeviația standard: ' + str(round(standard_deviation, 2)), x=0.75, y=0.85, fontsize=10) plt.show()
def process_item(self, item, spider): res_table = Residences() zones = res_table.get_zones() if 'zone' in item: zone_from_item = item['zone'].lower() for zone in zones: if zone_from_item in zone.get('name').lower(): item['zone_id'] = zone['zone_id'] return item for zone in zones: if similar(zone_from_item, zone.get('name').lower()) > 0.75: item['zone_id'] = zone['zone_id'] return item return item
def plot_surface_and_rental_price(): residence_table = Residences() residences = residence_table.get_all_residences() residences_df = pd.DataFrame(residences) residences_df = residences_df[residences_df.livable_area.notnull()] plt.figure() plt.style.use('ggplot') plt.scatter(residences_df['livable_area'], residences_df['price'], s=15, edgecolor="black", c="darkorange") plt.xlabel("Suprafața locuibilă") plt.ylabel("Prețului chiriei lunare") plt.title("Suprafața locuibilă / Prețului chiriei lunare") plt.legend() plt.show()
def plot_rooms_vs_price(): residence_table = Residences() residences = residence_table.get_all_residences() residences_df = pd.DataFrame(residences) residences_df = residences_df[residences_df.rooms.notnull()] plt.figure() plt.style.use('ggplot') plt.scatter(residences_df['rooms'], residences_df['price'], s=15, edgecolor="black", c="darkorange") plt.xlabel("Număr camere") plt.ylabel("Prețului chiriei lunare") plt.title("Număr camere / Prețului chiriei lunare") plt.legend() plt.show()
def plot_surface_vs_price_per_sq_meter(): residence_table = Residences() residences = residence_table.get_all_residences() residences_df = pd.DataFrame(residences) residences_df = residences_df[residences_df.livable_area.notnull()] residences_df = residences_df[residences_df.livable_area > 0] livable_area = residences_df['livable_area'] price_per_sq_meter = residences_df['price'] / residences_df['livable_area'] plt.figure() plt.style.use('ggplot') plt.scatter(livable_area, price_per_sq_meter, s=15, edgecolor="black", c="darkorange") plt.xlabel("Suprafața locuibilă") plt.ylabel("Prețul pe metrul pătrat") plt.title("Suprafața locuibilă / Suprafața locuibilă") plt.legend() plt.show()
def predict(): res_table = Residences() zones = res_table.get_zones() if request.method == 'POST': if request.form.get('url_to_crawl'): item = None item = crawl_item(request.form.get('url_to_crawl')) total_sleep = 0 while item is None: print( 'Crawler iteration - waiting for item in flask - sleep 0.5' ) total_sleep += 0.5 time.sleep(0.5) if total_sleep == 8: return render_template( "layout.html", zones=zones, error_msg= 'Nu am putut face extrage detaliile despre apartament') # print('Item:', item) if 'error' in item: return render_template("layout.html", zones=zones, error_msg=item.get('error')) json_ = item else: json_ = {} for key, val in request.form.items(multi=False): if val: json_[key] = float(val) json_ = [json_] print('Json:', json_) query = pd.get_dummies(pd.DataFrame(json_)) query = query.reindex(columns=rnd_columns, fill_value=0) predict = list(lr.predict(query)) print('Prediction:', predict) price_range_min = round(predict[0]) * 50 price_range_max = round(predict[0]) * 50 + 50 string_interval = "{} - {}".format(price_range_min, price_range_max) value = None if json_[0].get('price'): if price_range_min > json_[0].get('price'): value = 'subevaluat' elif price_range_max < json_[0].get('price'): value = 'supraevaluat' else: value = 'evaluat corect' return render_template("layout.html", zones=zones, price_interval=string_interval, specs=translate_specs(json_[0]), value=value) else: return render_template("layout.html", zones=zones)
from datetime import datetime import joblib import numpy as np import math from src.models.residences import Residences import pandas as pd from src.visualization.feature_importance import print_feature_importance residences_nr = 12000 save = False list_features_to_drop = ['price', 'currency', 'price_interval'] residence_table = Residences() # residences = residence_table.get_residences(residences_nr) residences = residence_table.get_all_residences() print('Number of residences: {}'.format(len(residences))) residences = pd.DataFrame(residences) residences = residences.fillna(-999) residences = residences.sample(frac=1) # target = np.array(residences['price']) target = np.array(residences['price_interval']) features = residences.drop(list_features_to_drop, axis=1) features_columns = features.columns feature_list = list(features.columns) features = np.array(features) data_train = features[:math.floor(len(features) * 0.9)] target_train = target[:math.floor(len(target) * 0.9)]
import numpy as np from src.models.residences import Residences import pandas as pd list_features_to_drop = [ 'price', '_sa_instance_state', 'id', 'main_ad_id', 'conditioning', 'heating', 'currency', 'availability', 'status', 'created_at' ] residence_table = Residences() residences = residence_table.get_residences(1600) residences = pd.DataFrame(residences) residences = residences.fillna(-1) residences = residences.sample(frac=1) residences_train = residences[np.math.floor(len(residences) * 0.8):] residences_test = residences[:np.math.floor(len(residences) * 0.8)] target_train = np.array(residences_train['price']) features_train = residences.drop(list_features_to_drop, axis=1) feature_train_list = list(features_train.columns) features_train = np.array(features_train) target_test = np.array(residences_test['price']) features_test = residences.drop(list_features_to_drop, axis=1) feature_test_list = list(features_test.columns) features_test = np.array(features_test) ## RANDOM FOREST - KFOLD AND MODEL
def open_spider(self, spider): res_table = Residences() ad_locations = res_table.get_ad_locations() spider.ad_locations = ad_locations