예제 #1
0
        n_items = len(unique_sid)

        def load_data(csv_file, shape=(n_users, n_items)):
            tp = pd.read_csv(csv_file)
            rows, cols, vals = np.array(tp['uid']), np.array(
                tp['sid']), np.array(tp['rating'])
            data = sparse.csr_matrix((vals, (rows, cols)),
                                     dtype=np.float32,
                                     shape=shape)
            return data

        train_data = load_data(os.path.join(DATA_DIR, 'train.csv'))
        vad_data = load_data(os.path.join(DATA_DIR, 'validation.csv'))
        test_data = load_data(os.path.join(DATA_DIR, 'test_full.csv'))

        train_data_imp = exp_to_imp(train_data, 0.5)
        vad_data_imp = exp_to_imp(vad_data, 0.5)

        dims = np.array([1, 2, 5, 10, 20, 50, 100])

        train_data_coo = train_data_imp.tocoo()
        row_tr, col_tr = train_data_coo.row, train_data_coo.col

        vad_data_coo = vad_data_imp.tocoo()
        row_vd, col_vd = vad_data_coo.row, vad_data_coo.col

        for i, dim in enumerate(dims):
            print("dim", dim)
            pf = pmf.PoissonMF(n_components=dim,
                               random_state=98765,
                               verbose=True,
예제 #2
0
    for dim in dims:

        U = np.loadtxt(CAUSEFIT_DIR + '/cause_pmf_k' + str(dim) + '_U.csv')
        V = np.loadtxt(CAUSEFIT_DIR + '/cause_pmf_k' + str(dim) + '_V.csv')
        U = (np.atleast_2d(U.T).T)
        V = (np.atleast_2d(V.T).T)
        reconstr_cau = U.dot(V.T)

        for i, K in enumerate(outdims):
            print("K0", dim, "K", K)

            D = train_data.shape[0]
            N = train_data.shape[1]
            weights = train_data * alpha
            cau = exp_to_imp(train_data)

            tf.reset_default_graph()
            sess = tf.InteractiveSession()

            idx_ph = tf.placeholder(tf.int32, M)
            cau_ph = tf.placeholder(tf.float32, [M, N])
            sd_ph = tf.placeholder(tf.float32, [M, N])
            reconstr_cau_ph = tf.placeholder(tf.float32, [M, N])

            U = Gamma(0.3 * tf.ones([M, K]), 0.3 * tf.ones([M, K]))
            V = Gamma(0.3 * tf.ones([N, K]), 0.3 * tf.ones([N, K]))
            gamma = Gamma(tf.ones([M, 1]), tf.ones([M, 1]))
            beta0 = Gamma(0.3 * tf.ones([1, 1]), 0.3 * tf.ones([1, 1]))

            x = Poisson(tf.add(tf.matmul(U, V, transpose_b=True),\
예제 #3
0
            data_te = sparse.csr_matrix(
                (ratings_te, (rows_te, cols_te)),
                dtype='float64',
                shape=(end_idx - start_idx + 1, n_items))
            return data_tr, data_te

        train_data = load_train_data(os.path.join(DATA_DIR,
                                                  'train.csv')).tocsr()
        vad_data_tr, vad_data_te = load_tr_te_data(
            os.path.join(DATA_DIR, 'validation_tr.csv'),
            os.path.join(DATA_DIR, 'validation_te.csv'))
        test_data_tr, test_data_te = load_tr_te_data(
            os.path.join(DATA_DIR, 'test_tr.csv'),
            os.path.join(DATA_DIR, 'test_te.csv'))

        train_data = exp_to_imp(train_data, 0.5)
        vad_data_tr, vad_data_te = exp_to_imp(vad_data_tr, 0.5), exp_to_imp(
            vad_data_tr, 0.5)
        test_data_tr, test_data_te = exp_to_imp(test_data_tr, 0.5), exp_to_imp(
            test_data_tr, 0.5)

        dims = np.array([1, 2, 5, 10, 20, 50, 100])

        train_data_coo = train_data.tocoo()
        vad_data_tr_coo = vad_data_tr.tocoo()
        test_data_tr_coo = test_data_tr.tocoo()

        for i, dim in enumerate(dims):
            print("dim", dim)
            pf = pmf.PoissonMF(n_components=dim,
                               random_state=98765,
예제 #4
0
    for dim in dims:

        U = np.loadtxt(CAUSEFIT_DIR + '/cause_pmf_k' + str(dim) +
                       '_trainU.csv')
        V = np.loadtxt(CAUSEFIT_DIR + '/cause_pmf_k' + str(dim) + '_V.csv')
        U = (np.atleast_2d(U.T).T)
        V = (np.atleast_2d(V.T).T)
        reconstr_cau_train = U.dot(V.T)

        for i, K in enumerate(outdims):
            print("K0", dim, "K", K)

            D = train_data.shape[0]
            N = train_data.shape[1]
            weights = train_data * alpha
            cau = exp_to_imp(train_data)

            tf.reset_default_graph()
            sess = tf.InteractiveSession()

            idx_ph = tf.placeholder(tf.int32, M)
            cau_ph = tf.placeholder(tf.float32, [M, N])
            sd_ph = tf.placeholder(tf.float32, [M, N])

            U = Normal(loc=tf.zeros([M, K]), scale=pri_U * tf.ones([M, K]))
            V = Normal(loc=tf.zeros([N, K]), scale=pri_V * tf.ones([N, K]))

            x = Normal(loc=tf.multiply(cau_ph, tf.matmul(U,
                                                         V,
                                                         transpose_b=True)),
                       scale=tf.ones([M, N]))
예제 #5
0
                                    shape=(end_idx - start_idx + 1, n_items))
        return data_tr, data_te

    train_data = load_train_data(os.path.join(DATA_DIR, 'train.csv')).tocsr()
    vad_data_tr, vad_data_te = load_tr_te_data(
        os.path.join(DATA_DIR, 'validation_tr.csv'),
        os.path.join(DATA_DIR, 'validation_te.csv'))
    test_data_tr, test_data_te = load_tr_te_data(
        os.path.join(DATA_DIR, 'test_tr.csv'),
        os.path.join(DATA_DIR, 'test_te.csv'))

    alpha = args.alpha

    print("alpha", alpha)

    cau = exp_to_imp(train_data, 0.5)

    if binary > 0:
        train_data = binarize_rating(train_data)
        vad_data_tr, vad_data_te = binarize_rating(
            vad_data_tr), binarize_rating(vad_data_te)
        test_data_tr, test_data_te = binarize_rating(
            test_data_tr), binarize_rating(test_data_te)

    train_data_coo = train_data.tocoo()
    vad_data_tr_coo = vad_data_tr.tocoo()
    test_data_tr_coo = test_data_tr.tocoo()

    model_name = 'sg_pmf_obs'

    print("model", model_name)