Exemplo n.º 1
0
def train(sppmi_mat, data_dir=DATA_DIR):
    '''
        Train
    '''
    # load file
    n_users = len(uid2idx)
    print('n_users:{}'.format(n_users))
    n_items = len(sid2idx)
    print('n_items:{}'.format(n_items))
    fn_train = os.path.join(data_dir, 'data_train.txt')
    fn_dev = os.path.join(data_dir, 'data_dev.txt')
    fn_test = os.path.join(data_dir, 'data_test.txt')
    train_data = util.load_input_data(fn_train, shape=(n_users, n_items))
    vad_data = util.load_input_data(fn_dev, shape=(n_users, n_items))
    test_data = util.load_input_data(fn_test, shape=(n_users, n_items))
    # train model
    coder = cofacto.CoFacto(n_components=N_COMPONENTS,
                            max_iter=max_iter,
                            batch_size=1000,
                            init_std=0.01,
                            n_jobs=N_JOBS,
                            random_state=98765,
                            save_params=True,
                            save_dir=save_dir,
                            early_stopping=True,
                            verbose=True,
                            lam_theta=lam_theta,
                            lam_beta=lam_beta,
                            lam_gamma=lam_gamma,
                            c0=c0,
                            c1=c1)
    coder.fit(train_data,
              sppmi_mat,
              vad_data=vad_data,
              batch_users=5000,
              k=100)
    # test model
    n_params = len(glob.glob(os.path.join(save_dir, '*.npz')))
    last_iter_num = n_params - 1
    params = np.load(
        os.path.join(save_dir,
                     'CoFacto_K%d_iter%d.npz' % (N_COMPONENTS, last_iter_num)))
    U, V = params['U'], params['V']
    print('Test Recall@20: %.4f' % rec_eval.recall_at_k(
        train_data, test_data, U, V, k=20, vad_data=vad_data))
    print('Test Recall@50: %.4f' % rec_eval.recall_at_k(
        train_data, test_data, U, V, k=50, vad_data=vad_data))
    print('Test NDCG@100: %.4f' % rec_eval.normalized_dcg_at_k(
        train_data, test_data, U, V, k=100, vad_data=vad_data))
    print('Test MAP@100: %.4f' % rec_eval.map_at_k(
        train_data, test_data, U, V, k=100, vad_data=vad_data))
    # save
    model_save_fn = os.path.join(
        DATA_DIR, 'Model_K{}_{}.npz'.format(N_COMPONENTS, DATA_SET_NAME))
    np.savez(model_save_fn, U=U, V=V)
    print('saved')
Exemplo n.º 2
0
c0 = 1. * scale
c1 = 10. * scale

save_dir = os.path.join(DATA_DIR, 'ML20M_ns%d_scale%1.2E' % (k_ns, scale))

# In[29]:

reload(cofacto)
coder = cofacto.CoFacto(n_components=n_components,
                        max_iter=max_iter,
                        batch_size=1000,
                        init_std=0.01,
                        n_jobs=n_jobs,
                        random_state=98765,
                        save_params=True,
                        save_dir=save_dir,
                        early_stopping=True,
                        verbose=True,
                        lam_theta=lam_theta,
                        lam_beta=lam_beta,
                        lam_gamma=lam_gamma,
                        c0=c0,
                        c1=c1)

cofacto.CoFacto()

# In[30]:

coder.fit(train_data, M_ns, vad_data=vad_data, batch_users=5000, k=100)

# In[31]: