batch_size = args.batch_size optimizer = 'sgd' use_sparse = not args.use_dense factor_size = args.factor_size dummy_iter = args.dummy_iter print_every = args.print_every momentum = 0.9 ctx = mx.gpu(0) if args.use_gpu else mx.cpu(0) learning_rate = 0.1 # prepare dataset and iterators max_user = MOVIELENS['max_user'] max_movies = MOVIELENS['max_movie'] data_dir = os.path.join(os.getcwd(), 'data') get_movielens_data(data_dir, MOVIELENS['dataset']) train_iter = get_movielens_iter(MOVIELENS['train'], batch_size, dummy_iter) val_iter = get_movielens_iter(MOVIELENS['val'], batch_size, dummy_iter) # construct the model net = matrix_fact_net(factor_size, factor_size, max_user, max_movies, sparse_embed=use_sparse) # initialize the module mod = mx.module.Module(symbol=net, context=ctx, data_names=['user', 'item'], label_names=['score']) mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) mod.init_params(initializer=mx.init.Xavier(factor_type="in", magnitude=2.34)) optim = mx.optimizer.create(optimizer, learning_rate=learning_rate, momentum=momentum, wd=1e-4, rescale_grad=1.0/batch_size) mod.init_optimizer(optimizer=optim) # use MSE as the metric
model = Model( input=[positive_item_input, negative_item_input, user_input], output=loss) model.compile(loss=identity_loss, optimizer=Adam()) return model if __name__ == '__main__': latent_dim = 100 num_epochs = 10 # Read data train, test = data.get_movielens_data() num_users, num_items = train.shape # Prepare the test triplets test_uid, test_pid, test_nid = data.get_triplets(test) model = build_model(num_users, num_items, latent_dim) # Print the model structure print(model.summary()) # Sanity check, should be around 0.5 print('AUC before training %s' % metrics.full_auc(model, test)) for epoch in range(num_epochs):
loss = model.predict({ 'user_input': user_features, 'positive_item_input': posititve_item_features, 'negative_item_input': negative_item_features })['triplet_loss'] return (loss > 0).mean() if __name__ == '__main__': num_epochs = 5 # Read data train, test = data.get_movielens_data() num_users, num_items = train.shape # Prepare the test triplets test_uid, test_pid, test_nid = data.get_triplets(test) test_user_features, test_positive_item_features, test_negative_item_features = data.get_dense_triplets( test_uid, test_pid, test_nid, num_users, num_items) # Sample triplets from the training data uid, pid, nid = data.get_triplets(train) user_features, positive_item_features, negative_item_features = data.get_dense_triplets( uid, pid, nid, num_users, num_items) model = get_graph(num_users, num_items, 256) # Print the model structure
import sys import numpy as np train, test = data.get_movielens_positive_data('/data/sidana/recnet_draft/' + sys.argv[1] + '/lightfm') uid_train, pid_train, nid_train = data.get_triplets(train) uid_test, pid_test, nid_test = data.get_triplets(test) model = LightFM(no_components=14, loss='logistic', learning_rate=0.01, item_alpha=0.0001, user_alpha=0.0001) model.fit(train, epochs=10, num_threads=1) train_all, test_all = data.get_movielens_data('/data/sidana/recnet_draft/' + sys.argv[1] + '/lightfm') uid_all_train, pid_all_train, nid_all_train = data.get_triplets(train_all) uid_all_test, pid_all_test, nid_all_test = data.get_triplets(test_all) export_basename = '/data/sidana/recnet_draft/' + sys.argv[ 1] + '/lightfm_all' + '/vectors/' export_pred = open(export_basename + 'pr', 'w') #export_pred = open(export_basename + 'pr', 'w') export_true = open(export_basename + 'gt', 'w') #export_true = open(export_basename + 'gt', 'w') for u in uid_test: #num_items_all_test_u = test_all.getrow(u).indices.shape[0] #items_all_test_u = test_all.getrow(u).indices known_positives = test.getrow(u).indices scores = model.predict(u, np.unique(test_all.col))
from lightfm import LightFM import data import sys import numpy as np train, test = data.get_movielens_positive_data('/data/sidana/ijcai_competetion/data_analysis/') uid_train, pid_train, nid_train = data.get_triplets(train) uid_test, pid_test, nid_test = data.get_triplets(test) model = LightFM(no_components=14,loss='logistic',learning_rate=0.01, item_alpha=0.0001, user_alpha=0.0001) model.fit(train, epochs=10, num_threads=1) train_all, test_all = data.get_movielens_data('/data/sidana/ijcai_competetion/data_analysis/') uid_all_train, pid_all_train, nid_all_train = data.get_triplets(train_all) uid_all_test, pid_all_test, nid_all_test = data.get_triplets(test_all) export_basename = '/data/sidana/ijcai_competetion/baselines/vectors/' export_pred = open(export_basename + 'pr', 'w') #export_pred = open(export_basename + 'pr', 'w') export_true = open(export_basename + 'gt', 'w') #export_true = open(export_basename + 'gt', 'w') export_scores = open(export_basename + 'rec', 'w') for u in uid_test: num_items_all_test_u = test_all.getrow(u).indices.shape[0] items_all_test_u = test_all.getrow(u).indices known_positives = test.getrow(u).indices scores = model.predict(u, items_all_test_u) #write scores for diversity metrics
num_epoch = args.num_epoch batch_size = args.batch_size optimizer = 'sgd' use_sparse = not args.use_dense factor_size = args.factor_size dummy_iter = args.dummy_iter print_every = args.print_every momentum = 0.9 ctx = mx.gpu(0) if args.use_gpu else mx.cpu(0) learning_rate = 0.1 # prepare dataset and iterators max_user = MOVIELENS['max_user'] max_movies = MOVIELENS['max_movie'] get_movielens_data(MOVIELENS['dataset']) train_iter = get_movielens_iter(MOVIELENS['train'], batch_size, dummy_iter) val_iter = get_movielens_iter(MOVIELENS['val'], batch_size, dummy_iter) # construct the model net = matrix_fact_net(factor_size, factor_size, max_user, max_movies, sparse_embed=use_sparse) # initialize the module mod = mx.module.Module(symbol=net, context=ctx, data_names=['user', 'item'], label_names=['score']) mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) mod.init_params(initializer=mx.init.Xavier(factor_type="in", magnitude=2.34)) optim = mx.optimizer.create(optimizer, learning_rate=learning_rate, momentum=momentum, wd=1e-4, rescale_grad=1.0/batch_size) mod.init_optimizer(optimizer=optim) # use MSE as the metric
optimizer = 'sgd' factor_size = args.factor_size log_interval = args.log_interval momentum = 0.9 ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] if args.gpus else [mx.cpu()] learning_rate = 0.1 mx.random.seed(args.seed) np.random.seed(args.seed) # prepare dataset and iterators max_user = MOVIELENS['max_user'] max_movies = MOVIELENS['max_movie'] data_dir = os.path.join(os.getcwd(), 'data') get_movielens_data(data_dir, MOVIELENS['dataset']) train_iter = get_movielens_iter(MOVIELENS['train'], batch_size) val_iter = get_movielens_iter(MOVIELENS['val'], batch_size) # construct the model net = matrix_fact_net(factor_size, factor_size, max_user, max_movies, dense=args.dense) # initialize the module mod = mx.module.Module(net, context=ctx, data_names=['user', 'item'], label_names=['score'])