def full_prep_test_ds_to_cust_pred(): test_datas = d.read_imputed_onehot_test_dataset() data_from = d.read_imputed_onehot_dataset() data_from['Date'] = p.to_datetime(data_from['Date'], format='%d/%m/%Y') data_from['Day'] = data_from['Date'].dt.weekday_name datas = prepare_ds_to_customer_prediction(test_datas, data_from) d.save_dataset(datas, "test_dataset_for_customers_prediction.csv")
def build_sales_predictor_train_dataset(name): ds = d.read_imputed_onehot_dataset() ds = __prepare_sales_train_ds(ds) d.save_dataset(ds, name)
for i in range(number_of_model): p = models[i].predict(x, 500).squeeze() preds += p preds[preds < 0] = 0 for i in range(min(len(preds), number_print)): print("PRED: ", preds[i], " y: ", y[i]) print("R2: ", eva.r2(ds, preds, 'NumberOfSales')) if __name__ == '__main__': TRAIN = True LOAD = False SAVE_DF = False name = "test" ds = d.read_imputed_onehot_dataset() ds = prepare_ds(ds) d.save_dataset(ds, "fully_preprocessed_ds.csv") ds_train = utils.get_frame_in_range(ds, 3, 2016, 12, 2017) ds_test = utils.get_frame_in_range(ds, 1, 2018, 2, 2018) y = prepare_out(ds_train) real_y = np.array(y) dy = np.zeros(y.shape) x = drop_useless(ds_train) y_test = prepare_out(ds_test) if SAVE_DF: d.save_dataset(ds_test, "dataset_to_predict_sales.csv") x_test = drop_useless(ds_test) models = [] for i in range(number_of_model):
def build_cust_predictor_train_dataset(m1, a1, m2, a2): das = ds.read_imputed_onehot_dataset() das = __prepare_customers_train_ds(das, m1, a1, m2, a2) return das
correlation_analysis = False PCA_analysis = False PCA_correlation_attributes = False PCA_analysis_attribute = 'Region_AreaKM2' sb.set_style("whitegrid") sb.set(style="white", color_codes=True) sb.set_context( rc={ "font.family": 'sans', "font.size": 5, "axes.titlesize": 8, "axes.labelsize": 8 }) data = datasetfun.read_imputed_onehot_dataset() data_nominal = datasetfun.nominal_only(data) data_numeric = datasetfun.numeric_only(data) if correlation_analysis == True: numeric_corr = data_numeric.corr(method="pearson") numeric_heatmap = sb.heatmap(numeric_corr, square=True, annot=True, cmap="Blues") pl.show()