] if args.dataset == 'kkbox_v2': df_test = df_train.sample(frac=0.25) df_train = df_train.drop(df_test.index) df_val = df_train.sample(frac=0.1) df_train = df_train.drop(df_val.index) elif not (args.dataset == 'kkobx'): df_test = df_train.sample(frac=0.2) df_train = df_train.drop(df_test.index) df_val = df_train.sample(frac=0.2) df_train = df_train.drop(df_val.index) standardize = [([col], StandardScaler()) for col in cols_standardize] leave = [(col, None) for col in cols_leave] categorical = [(col, OrderedCategoricalLong()) for col in cols_categorical] x_mapper_float = DataFrameMapper(standardize + leave) x_mapper_long = DataFrameMapper(categorical) x_fit_transform = lambda df: tt.tuplefy( x_mapper_float.fit_transform(df).astype(np.float32), x_mapper_long.fit_transform(df)) x_transform = lambda df: tt.tuplefy( x_mapper_float.transform(df).astype(np.float32), x_mapper_long.transform(df)) x_train = x_fit_transform(df_train) x_val = x_transform(df_val) x_test = x_transform(df_test) num_embeddings = x_train[1].max(0) + 1
df_train.columns = [ "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "duration", "event" ] cols_standardize = ["x0", "x3", "x4", "x6"] cols_leave = ["x1", "x7"] cols_categorical = ["x2", "x5"] if len(cols_categorical) > 0: num_embeddings = [ len(df_train[cat].unique()) + 1 for cat in cols_categorical ] embedding_dims = [math.ceil(n_emb / 2) for n_emb in num_embeddings] standardize = [([col], StandardScaler()) for col in cols_standardize] leave = [(col, None) for col in cols_leave] categorical = [(col, OrderedCategoricalLong()) for col in cols_categorical] x_mapper_float = DataFrameMapper(standardize + leave) x_mapper_long = DataFrameMapper(categorical) x_fit_transform = lambda df: tt.tuplefy( x_mapper_float.fit_transform(df).astype(np.float32), x_mapper_long.fit_transform(df)) x_transform = lambda df: tt.tuplefy( x_mapper_float.transform(df).astype(np.float32), x_mapper_long.transform(df)) else: standardize = [([col], StandardScaler()) for col in cols_standardize] leave = [(col, None) for col in cols_leave] x_mapper = DataFrameMapper(standardize + leave)