sep=",", header=0) # convert to implicit data and do negative sampling afterwards data["label"] = 1 # split into train and test data based on time train_data, test_data = split_by_ratio_chrono(data, test_size=0.2) # specify complete columns information sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"] dense_col = ["age"] user_col = ["sex", "age", "occupation"] item_col = ["genre1", "genre2", "genre3"] train_data, data_info = DatasetFeat.build_trainset(train_data, user_col, item_col, sparse_col, dense_col) test_data = DatasetFeat.build_testset(test_data, sparse_col, dense_col) # sample negative items for each record train_data.build_negative_samples(data_info) test_data.build_negative_samples(data_info) print(data_info) # n_users: 5962, n_items: 3226, data sparsity: 0.4185 % ytb_ranking = YouTubeRanking(task="ranking", data_info=data_info, embed_size=16, n_epochs=3, lr=1e-4, batch_size=512, use_bn=True,
col_names = [ "user", "item", "label", "time", "sex", "age", "occupation", "genre1", "genre2", "genre3" ] data = pd.read_csv("sample_data/sample_movielens_merged.csv", sep=",", header=0) train, test = split_by_ratio_chrono(data, test_size=0.2) sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"] dense_col = ["age"] user_col = ["sex", "age", "occupation"] item_col = ["genre1", "genre2", "genre3"] train_data, data_info = DatasetFeat.build_trainset(train, user_col, item_col, sparse_col, dense_col, shuffle=False) test_data = DatasetFeat.build_testset(test, shuffle=False) print(data_info) train_data.build_negative_samples(data_info, num_neg=1, item_gen_mode="random", seed=2020) test_data.build_negative_samples(data_info, num_neg=1, item_gen_mode="random", seed=2222) deepfm = DeepFM("ranking", data_info,
sep=",", header=0) train_data, eval_data = split_by_ratio_chrono(data, test_size=0.2) # specify complete columns information sparse_col = ["sex", "occupation"] multi_sparse_col = [["genre1", "genre2", "genre3"]] # should be list of list dense_col = ["age"] user_col = ["sex", "age", "occupation"] item_col = ["genre1", "genre2", "genre3"] train_data, data_info = DatasetFeat.build_trainset( train_data=train_data, user_col=user_col, item_col=item_col, sparse_col=sparse_col, dense_col=dense_col, multi_sparse_col=multi_sparse_col, ) eval_data = DatasetFeat.build_testset(eval_data) print(data_info) # do negative sampling, assume the data only contains positive feedback train_data.build_negative_samples(data_info, item_gen_mode="random", num_neg=1, seed=2020) eval_data.build_negative_samples(data_info, item_gen_mode="random", num_neg=1, seed=2222)
if __name__ == "__main__": col_names = ["user", "item", "label", "time", "sex", "age", "occupation", "genre1", "genre2", "genre3"] all_data = pd.read_csv("sample_data/sample_movielens_merged.csv", sep=",", header=0) # use first half data as first training part first_half_data = all_data[:(len(all_data) // 2)] train, test = split_by_ratio_chrono(first_half_data, test_size=0.2) sparse_col = ["sex", "occupation", "genre1", "genre2", "genre3"] dense_col = ["age"] user_col = ["sex", "age", "occupation"] item_col = ["genre1", "genre2", "genre3"] train_data, data_info = DatasetFeat.build_trainset(train, user_col, item_col, sparse_col, dense_col, shuffle=False) test_data = DatasetFeat.build_testset(test, shuffle=False) print(data_info) train_data.build_negative_samples(data_info, num_neg=1, item_gen_mode="random", seed=2020) test_data.build_negative_samples(data_info, num_neg=1, item_gen_mode="random", seed=2222) deepfm = DeepFM("ranking", data_info, embed_size=16, n_epochs=2, lr=1e-4, lr_decay=False, reg=None, batch_size=2048, num_neg=1, use_bn=False, dropout_rate=None, hidden_units="128,64,32", tf_sess_config=None) deepfm.fit(train_data, verbose=2, shuffle=True, eval_data=test_data, metrics=["loss", "balanced_accuracy", "roc_auc", "pr_auc", "precision", "recall", "map", "ndcg"],
data = pd.read_csv("sample_data/sample_movielens_merged.csv", sep=",", header=0) train_data, eval_data = split_by_ratio_chrono(data, test_size=0.2) # specify complete columns information sparse_col = ["sex", "occupation"] multi_sparse_col = [["genre1", "genre2", "genre3"]] # should be list of list dense_col = ["age"] user_col = ["sex", "age", "occupation"] item_col = ["genre1", "genre2", "genre3"] train_data, data_info = DatasetFeat.build_trainset( train_data=train_data, user_col=user_col, item_col=item_col, sparse_col=sparse_col, dense_col=dense_col, multi_sparse_col=multi_sparse_col, pad_val=["missing"] # specify padding value ) eval_data = DatasetFeat.build_testset(eval_data) print(data_info) # do negative sampling, assume the data only contains positive feedback train_data.build_negative_samples(data_info, item_gen_mode="random", num_neg=1, seed=2020) eval_data.build_negative_samples(data_info, item_gen_mode="random", num_neg=1, seed=2222) deepfm = DeepFM("ranking", data_info, embed_size=16, n_epochs=2, lr=1e-4, lr_decay=False, reg=None, batch_size=2048, num_neg=1, use_bn=False, dropout_rate=None,