def prepare_ds(ds): ds['Date'] = p.to_datetime(ds['Date'], format='%d/%m/%Y') ds['Day'] = ds['Date'].dt.weekday_name ds = imp.one_hot(ds, 'Day', header='Day_') ds = pre_u.mean_std_cust_per_shop_per_day(ds) ds = pre_u.eliminate_IsOpen_zeros(ds) ds = pre_u.add_avg_cust_per_shop(ds) ds = pre_u.add_std_cust_per_shop(ds) ds = pre_u.add_max_cust_per_shop(ds) ds = pre_u.add_min_cust_per_shop(ds) return ds
def prepare_ds(ds): ds['Date'] = p.to_datetime(ds['Date'], format='%d/%m/%Y') ds['Day'] = ds['Date'].dt.weekday_name ds = imp.one_hot(ds, 'Day', header='Day_') ds = pre_u.mean_std_sales_per_shop_per_day(ds) print(ds[['StoreID', 'MeanSalesPerShopPerDay', 'StdSalesPerShopPerDay']]) ds = pre_u.eliminate_IsOpen_zeros(ds) ds = pre_u.add_avg_per_shop(ds) ds = pre_u.add_std_per_shop(ds) ds = pre_u.add_max_per_shop(ds) ds = pre_u.add_min_per_shop(ds) return ds
def __prepare_sales_train_ds(ds): ds['Date'] = p.to_datetime(ds['Date'], format='%d/%m/%Y') ds['Day'] = ds['Date'].dt.weekday_name ds['Date'] = ds['Date'].apply(lambda x: x.strftime('%Y-%m-%d')) ds['Month'] = ds['Date'] ds['Month'] = ds['Month'].apply(lambda x: x.split("-")[1]) ds = imp.one_hot_numeric(ds, 'Month', 'Month_') ds = imp.one_hot_numeric(ds, 'Region', 'Region_') ds = imp.one_hot(ds, 'Day', header='Day_') ds = pre_u.eliminate_IsOpen_zeros(ds) ds = pre_u.mean_std_sales_per_shop_per_day(ds) ds = pre_u.add_avg_per_shop(ds) ds = pre_u.add_std_per_shop(ds) ds = pre_u.add_max_per_shop(ds) ds = pre_u.add_min_per_shop(ds) ds = pre_u.mean_sales_per_month_per_region(ds) return ds
def __prepare_customers_train_ds(das, m1, a1, m2, a2): das['Date'] = pandas.to_datetime(das['Date'], format='%d/%m/%Y') das['Day'] = das['Date'].dt.weekday_name das['Date'] = das['Date'].apply(lambda x: x.strftime('%Y-%m-%d')) das['Month'] = das['Date'] das['Month'] = das['Month'].apply(lambda x: x.split("-")[1]) das = imp.one_hot(das, 'Day', header='Day_') das = imp.one_hot_numeric(das, 'Month', 'Month_') das = imp.one_hot_numeric(das, 'Region', 'Region_') dfrom = utils.get_frame_out_of_range(das, m1, a1, m2, a2) das = preu.eliminate_IsOpen_zeros(das) das = preu.mean_std_cust_per_shop_per_day(das, dfrom) das = preu.add_avg_cust_per_shop(das, dfrom) das = preu.add_std_cust_per_shop(das, dfrom) das = preu.add_max_cust_per_shop(das, dfrom) das = preu.add_min_cust_per_shop(das, dfrom) das = preu.mean_cust_per_month_per_shop(das, dfrom) das = preu.mean_cust_per_month_per_region(das, dfrom) return das
def __prepare_customers_test_ds(ds, dfrom): ds['NumberOfSales'] = p.Series(np.zeros(len(ds)), ds.index) ds['NumberOfCustomers'] = p.Series(np.zeros(len(ds)), ds.index) ds['Date'] = p.to_datetime(ds['Date'], format='%d/%m/%Y') ds['Day'] = ds['Date'].dt.weekday_name ds['Date'] = ds['Date'].apply(lambda x: x.strftime('%Y-%m-%d')) ds['Month'] = ds['Date'] ds['Month'] = ds['Month'].apply(lambda x: x.split("-")[1]) ds = imp.one_hot(ds, 'Day', header='Day_') ds = imp.one_hot_numeric(ds, 'Month', 'Month_') ds = imp.one_hot_numeric(ds, 'Region', 'Region_') ds = pre_u.eliminate_IsOpen_zeros(ds) ds = pre_u.mean_std_cust_per_shop_per_day(ds, dfrom) ds = pre_u.add_avg_cust_per_shop(ds, dfrom) ds = pre_u.add_std_cust_per_shop(ds, dfrom) ds = pre_u.add_max_cust_per_shop(ds, dfrom) ds = pre_u.add_min_cust_per_shop(ds, dfrom) ds = pre_u.mean_cust_per_month_per_shop(ds, dfrom) ds = pre_u.mean_cust_per_month_per_region(ds, dfrom) return ds