def train(sppmi_mat, data_dir=DATA_DIR): ''' Train ''' # load file n_users = len(uid2idx) print('n_users:{}'.format(n_users)) n_items = len(sid2idx) print('n_items:{}'.format(n_items)) fn_train = os.path.join(data_dir, 'data_train.txt') fn_dev = os.path.join(data_dir, 'data_dev.txt') fn_test = os.path.join(data_dir, 'data_test.txt') train_data = util.load_input_data(fn_train, shape=(n_users, n_items)) vad_data = util.load_input_data(fn_dev, shape=(n_users, n_items)) test_data = util.load_input_data(fn_test, shape=(n_users, n_items)) # train model coder = cofacto.CoFacto(n_components=N_COMPONENTS, max_iter=max_iter, batch_size=1000, init_std=0.01, n_jobs=N_JOBS, random_state=98765, save_params=True, save_dir=save_dir, early_stopping=True, verbose=True, lam_theta=lam_theta, lam_beta=lam_beta, lam_gamma=lam_gamma, c0=c0, c1=c1) coder.fit(train_data, sppmi_mat, vad_data=vad_data, batch_users=5000, k=100) # test model n_params = len(glob.glob(os.path.join(save_dir, '*.npz'))) last_iter_num = n_params - 1 params = np.load( os.path.join(save_dir, 'CoFacto_K%d_iter%d.npz' % (N_COMPONENTS, last_iter_num))) U, V = params['U'], params['V'] print('Test Recall@20: %.4f' % rec_eval.recall_at_k( train_data, test_data, U, V, k=20, vad_data=vad_data)) print('Test Recall@50: %.4f' % rec_eval.recall_at_k( train_data, test_data, U, V, k=50, vad_data=vad_data)) print('Test NDCG@100: %.4f' % rec_eval.normalized_dcg_at_k( train_data, test_data, U, V, k=100, vad_data=vad_data)) print('Test MAP@100: %.4f' % rec_eval.map_at_k( train_data, test_data, U, V, k=100, vad_data=vad_data)) # save model_save_fn = os.path.join( DATA_DIR, 'Model_K{}_{}.npz'.format(N_COMPONENTS, DATA_SET_NAME)) np.savez(model_save_fn, U=U, V=V) print('saved')
U = model[0] C = model[1] G = model[2] vad_data = None try: train_data = T1_train test_data = T1_test print 'Testing USER-GAME MATRIX' print 'Test Recall@20: %f' % rec_eval.recall_at_k( train_data, test_data, U, G, k=20, vad_data=vad_data) print 'Test Recall@50: %f' % rec_eval.recall_at_k( train_data, test_data, U, G, k=50, vad_data=vad_data) print 'Test NDCG@100: %f' % rec_eval.normalized_dcg_at_k( train_data, test_data, U, G, k=100, vad_data=vad_data) print 'Test MAP@100: %f' % rec_eval.map_at_k( train_data, test_data, U, G, k=100, vad_data=vad_data) except: print 'Error' try: train_data = T2_train test_data = T2_test print 'Testing USER-GROUP MATRIX' print 'Test Recall@20: %f' % rec_eval.recall_at_k( train_data, test_data, U, C, k=20, vad_data=vad_data) print 'Test Recall@50: %f' % rec_eval.recall_at_k( train_data, test_data, U, C, k=50, vad_data=vad_data) print 'Test NDCG@100: %f' % rec_eval.normalized_dcg_at_k( train_data, test_data, U, C, k=100, vad_data=vad_data) print 'Test MAP@100: %f' % rec_eval.map_at_k(
test_data, _ = load_data(os.path.join(DATA_DIR, 'test.csv')) test_data.data = np.ones_like(test_data.data) # In[32]: n_params = len(glob.glob(os.path.join(save_dir, '*.npz'))) params = np.load( os.path.join(save_dir, 'CoFacto_K%d_iter%d.npz' % (n_components, n_params - 1))) U, V = params['U'], params['V'] #write file for map # In[33]: #user_idx = rec_eval.user_idx_generator(test_data.shape[1], test_data) #batch_user=user_idx.stop-user_idx.start #X_pred = rec_eval._make_prediction(train_data, U, V, user_idx, batch_user, vad_data=test_data) #print 'Test Recall@20: %.4f' % rec_eval.recall_at_k(train_data, test_data, U, V, k=20, vad_data=vad_data) #print 'Test Recall@50: %.4f' % rec_eval.recall_at_k(train_data, test_data, U, V, k=50, vad_data=vad_data) #print 'Test NDCG@10: %.4f' % rec_eval.normalized_dcg_at_k(train_data, test_data, U, V, k=10, vad_data=vad_data) print 'Test MAP@10: %.4f' % rec_eval.map_at_k( train_data, test_data, U, V, k=10, vad_data=vad_data) # In[34]: np.savez('CoFactor_K100_ML20M.npz', U=U, V=V) # In[ ]: