示例#1
0
        ]

    if args.dataset == 'kkbox_v2':
        df_test = df_train.sample(frac=0.25)
        df_train = df_train.drop(df_test.index)
        df_val = df_train.sample(frac=0.1)
        df_train = df_train.drop(df_val.index)
    elif not (args.dataset == 'kkobx'):
        df_test = df_train.sample(frac=0.2)
        df_train = df_train.drop(df_test.index)
        df_val = df_train.sample(frac=0.2)
        df_train = df_train.drop(df_val.index)

    standardize = [([col], StandardScaler()) for col in cols_standardize]
    leave = [(col, None) for col in cols_leave]
    categorical = [(col, OrderedCategoricalLong()) for col in cols_categorical]

    x_mapper_float = DataFrameMapper(standardize + leave)
    x_mapper_long = DataFrameMapper(categorical)

    x_fit_transform = lambda df: tt.tuplefy(
        x_mapper_float.fit_transform(df).astype(np.float32),
        x_mapper_long.fit_transform(df))
    x_transform = lambda df: tt.tuplefy(
        x_mapper_float.transform(df).astype(np.float32),
        x_mapper_long.transform(df))

    x_train = x_fit_transform(df_train)
    x_val = x_transform(df_val)
    x_test = x_transform(df_test)
    num_embeddings = x_train[1].max(0) + 1
示例#2
0
        df_train.columns = [
            "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "duration", "event"
        ]
        cols_standardize = ["x0", "x3", "x4", "x6"]
        cols_leave = ["x1", "x7"]
        cols_categorical = ["x2", "x5"]

    if len(cols_categorical) > 0:
        num_embeddings = [
            len(df_train[cat].unique()) + 1 for cat in cols_categorical
        ]
        embedding_dims = [math.ceil(n_emb / 2) for n_emb in num_embeddings]

        standardize = [([col], StandardScaler()) for col in cols_standardize]
        leave = [(col, None) for col in cols_leave]
        categorical = [(col, OrderedCategoricalLong())
                       for col in cols_categorical]

        x_mapper_float = DataFrameMapper(standardize + leave)
        x_mapper_long = DataFrameMapper(categorical)
        x_fit_transform = lambda df: tt.tuplefy(
            x_mapper_float.fit_transform(df).astype(np.float32),
            x_mapper_long.fit_transform(df))
        x_transform = lambda df: tt.tuplefy(
            x_mapper_float.transform(df).astype(np.float32),
            x_mapper_long.transform(df))
    else:
        standardize = [([col], StandardScaler()) for col in cols_standardize]
        leave = [(col, None) for col in cols_leave]
        x_mapper = DataFrameMapper(standardize + leave)