categorical_cols = ["equipo"] lag_list = [1] rolling_list = [1, 3, 6, 12, 24] #df_ewm = interact_categorical_numerical( # df, lag_col, numerical_cols, # categorical_cols, lag_list, # rolling_list, agg_funct="sum", # rolling_function = "ewm", freq=None, # group_name=None, store_name=False) df_rolling = interact_categorical_numerical(df, lag_col, numerical_cols, categorical_cols, lag_list, rolling_list, agg_funct="sum", rolling_function="rolling", freq=None, group_name=None, store_name=False) #df_expansion = interact_categorical_numerical( # df, lag_col, numerical_cols, # categorical_cols, lag_list, # rolling_list, agg_funct="sum", # rolling_function = "expanding", freq=None, # group_name=None, store_name=False) id_columns = ['equipo', "partido_equipo_num"] #df= df.merge(df_ewm, on = id_columns ) #df= df.merge(df_expansion, on = id_columns )
new_numerical_cols.append(name) numerical_cols = numerical_cols + new_numerical_cols print(numerical_cols) #Processing lag and window functions. lag_col = "date" categorical_cols = ["fullVisitorId"] lag_list = [0, 1, 3, 7] rolling_list = [1, 3, 7, 14] df_ewm = interact_categorical_numerical(df_timeseries, lag_col, numerical_cols, categorical_cols, lag_list, rolling_list, agg_funct="sum", rolling_function="ewm", freq=None, group_name=None, store_name=False) df_ewm = clean_dataset(df_ewm) df_ewm = df_ewm.replace(np.nan, 0) print("TEST: ", len(df)) df = df.merge(df_ewm, on=["fullVisitorId", "date"], how="inner") print("TEST: ", len(df)) del df_ewm df.to_csv("../input/df_rolling_past.csv") #Processing lag and window functions.