Пример #1
0
def train_test(density, nmodes, side_info):
    np.random.seed(seed)
    Us = [np.random.randn(i * 4, 1) for i in range(1, nmodes + 1)]
    subscripts = [[i + 1, 0] for i in range(nmodes)]
    Y = np.einsum(*[j for i in zip(Us, subscripts) for j in i])
    if density < 1.:
        _, Y = smurff.make_train_test(Y, density, seed=seed)
    Ytrain, Ytest = smurff.make_train_test(Y, 0.5, seed=seed)
    return Ytrain, Ytest, side_info(Us[0])
Пример #2
0
    def test_bmf_pp(self):
        Y = scipy.sparse.rand(30, 20, 0.2)
        Y, Ytest = smurff.make_train_test(Y, 0.5)
        session = smurff.BPMFSession(Y,
                                     Ytest=Ytest,
                                     num_latent=4,
                                     verbose=verbose,
                                     burnin=5,
                                     nsamples=20,
                                     save_freq=1)
        session.run()
        predict_session = session.makePredictSession()

        sess_rmse = float(predict_session.statsYTest()["rmse_avg"])
        Ypred, Yvar = predict_session.predictionsYTest()
        calc_rmse = math.sqrt(
            mean_squared_error(Ytest.tocoo().data,
                               Ypred.tocoo().data))

        self.assertAlmostEqual(sess_rmse, calc_rmse, 2)

        for m in range(predict_session.nmodes):
            calc_mu, calc_Lambda = calc_posteriorMeanPrec(predict_session, m)
            sess_mu, sess_Lambda = predict_session.postMuLambda(m)

            np.testing.assert_almost_equal(calc_mu, sess_mu)
            np.testing.assert_almost_equal(calc_Lambda, sess_Lambda)
Пример #3
0
    def test_macau_dense_probit(self):
        A = np.random.randn(25, 2)
        B = np.random.randn(3, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B"])
        df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :])
                                 for i in idx]) > 0.0).astype(np.float64)
        Ytrain, Ytest = smurff.make_train_test(df, 0.2)

        threshold = 0.5  # since we sample from mu(0,1)

        trainSession = smurff.TrainSession(priors=['macau', 'normal'],
                                           num_latent=4,
                                           threshold=threshold,
                                           burnin=200,
                                           nsamples=200,
                                           verbose=False)

        trainSession.addTrainAndTest(Ytrain, Ytest,
                                     smurff.ProbitNoise(threshold))
        trainSession.addSideInfo(0, A, direct=True)

        predictions = trainSession.run()

        auc = smurff.calc_auc(predictions, 0.5)
        self.assertTrue(
            auc > 0.55,
            msg=
            "Probit factorization (with dense side) gave AUC below 0.55 (%f)."
            % auc)
Пример #4
0
def gen_and_write(shape, num_latent, density, center="none"):
    Y, X, W = gen_matrix(shape, num_latent, density)
    Ytrain, Ytest = make_train_test(Y, 0.8)
    shape_str = "_".join(map(str, shape))
    dirname = "data_%s_%d_%d_%s" % (shape_str, num_latent, int(
        density * 100), center)

    if os.path.exists(dirname):
        print("Already exists: %s. Skipping" % dirname)
        return

    print("%s..." % dirname)
    os.makedirs(dirname)

    # PAY ATTENTION TO AXIS ORDER
    if (center == "row"):
        Y = preprocessing.scale(Y, axis=0, with_std=False)
    elif (center == "col"):
        Y = preprocessing.scale(Y, axis=1, with_std=False)
    elif (center == "global"):
        Y.data = Y.data - np.mean(Y.data)
    else:
        assert center == "none"

    write_matrix(dirname, "train", Ytrain)
    write_matrix(dirname, "test", Ytest)
    write_matrix(dirname, "rows", X)
    write_matrix(dirname, "cols", W)
Пример #5
0
    def test_bpmf_tensor3(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(20, 2)
        C = np.random.randn(1, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]),
                              np.arange(C.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = smurff.make_train_test(df, 0.2)

        predictions = smurff.smurff(Ytrain,
                                    Ytest=Ytest,
                                    priors=['normal', 'normal', 'normal'],
                                    num_latent=4,
                                    verbose=verbose,
                                    burnin=20,
                                    nsamples=20)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." %
                        rmse)
Пример #6
0
    def test_bmf_pp(self):
        np.random.seed(seed)
        Y = scipy.sparse.rand(30, 20, 0.2)
        Y, Ytest = smurff.make_train_test(Y, 0.5, seed=seed)
        trainSession = smurff.BPMFSession(
            Y,
            is_scarce=True,
            Ytest=Ytest,
            num_latent=4,
            verbose=verbose,
            burnin=20,
            nsamples=20,
            save_freq=1,
            seed=seed,
            save_name=smurff.helper.temp_savename())
        trainSession.run()
        predict_session = trainSession.makePredictSession()

        sess_rmse = float(predict_session.statsYTest()["rmse_avg"])
        Ypred, Yvar = predict_session.predictionsYTest()

        Yt_i, Yt_j, Yt_v = scipy.sparse.find(Ytest)
        Yp_i, Yp_j, Yp_v = scipy.sparse.find(Ypred)
        assert (Yp_i == Yt_i).all() and (Yp_j == Yt_j).all()

        calc_rmse = math.sqrt(mean_squared_error(Yt_v, Yp_v))

        self.assertAlmostEqual(sess_rmse, calc_rmse, 4)

        for m in range(predict_session.nmodes):
            calc_mu, calc_Lambda = calc_posteriorMeanPrec(predict_session, m)
            sess_mu, sess_Lambda = predict_session.postMuLambda(m)

            np.testing.assert_almost_equal(calc_mu, sess_mu)
            np.testing.assert_almost_equal(calc_Lambda, sess_Lambda)
Пример #7
0
    def test_macau_tensor(self):
        shape = [30, 4, 2]

        A = np.random.randn(shape[0], 2)
        B = np.random.randn(shape[1], 2)
        C = np.random.randn(shape[2], 2)

        idx = list(
            itertools.product(np.arange(shape[0]), np.arange(shape[1]),
                              np.arange(shape[2])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = smurff.make_train_test(df, 0.2, shape=shape)

        Acoo = scipy.sparse.coo_matrix(A)

        predictions = smurff.macau(Ytrain=Ytrain,
                                   Ytest=Ytest,
                                   side_info=[Acoo, None, None],
                                   direct=True,
                                   num_latent=4,
                                   verbose=verbose,
                                   burnin=200,
                                   nsamples=200)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 1.,
                        msg="Tensor factorization gave RMSE above 1. (%f)." %
                        rmse)
Пример #8
0
 def test_bpmf_numerictest(self):
     X = sp.rand(15, 10, 0.2)
     Xt = 0.3
     X, Xt = smurff.make_train_test(X, Xt)
     smurff.bpmf(X,
                   Ytest=Xt,
                   num_latent=10,
                   burnin=10,
                   nsamples=15,
                   verbose=verbose)
Пример #9
0
 def test_bpmf(self):
     Y = sp.rand(10, 20, 0.2)
     Y, Ytest = smurff.make_train_test(Y, 0.5)
     predictions = smurff.bpmf(Y,
                             Ytest=Ytest,
                             num_latent=4,
                             verbose=verbose,
                             burnin=50,
                             nsamples=50)
     self.assertEqual(Ytest.nnz, len(predictions))
Пример #10
0
    def run_train_session(self, nmodes, sparse):
        shape = range(2, nmodes+2) # 2, 3, 4, ... 
        Y = np.random.rand(*shape)
        if sparse: # make Y SparseTensor through make_train_test
            _, Y = smurff.make_train_test(Y, 0.5)
        self.Ytrain, self.Ytest = smurff.make_train_test(Y, 0.1)
        priors = ['normal'] * nmodes

        trainSession = smurff.TrainSession(priors = priors, num_latent=4,
                burnin=10, nsamples=15, verbose=verbose,
                save_freq = 1, save_name = smurff.helper.temp_savename())

        trainSession.addTrainAndTest(self.Ytrain, self.Ytest)

        trainSession.init()
        while trainSession.step():
            pass

        return trainSession
Пример #11
0
 def test_gfa_3view(self):
     Y = scipy.sparse.rand(10, 20, 0.2)
     Y, Ytest = smurff.make_train_test(Y, 0.5)
     predictions = smurff.gfa([Y, Y, Y],
                              Ytest=Ytest,
                              num_latent=4,
                              verbose=verbose,
                              burnin=5,
                              nsamples=5)
     self.assertEqual(Ytest.nnz, len(predictions))
Пример #12
0
 def test_bpmf(self):
     Y = scipy.sparse.rand(10, 20, 0.2)
     Y, Ytest = smurff.make_train_test(Y, 0.5)
     predictions = smurff.smurff(Y,
                                 Ytest=Ytest,
                                 priors=['normal', 'normal'],
                                 num_latent=4,
                                 verbose=False,
                                 burnin=50,
                                 nsamples=50)
     self.assertEqual(Ytest.nnz, len(predictions))
Пример #13
0
 def test_bpmf_numerictest(self):
     X = scipy.sparse.rand(15, 10, 0.2)
     Xt = 0.3
     X, Xt = smurff.make_train_test(X, Xt)
     smurff.smurff(X,
                   Ytest=Xt,
                   priors=['normal', 'normal'],
                   num_latent=10,
                   burnin=10,
                   nsamples=15,
                   verbose=False)
Пример #14
0
    def test_bpmf_dense_matrix_dense_2d_tensor(self):
        np.random.seed(1234)

        # Generate train matrix rows, cols and vals
        train_shape = (5, 4)
        sparse_random = sp.random(5, 4, density=1.0)
        train_dense_matrix = sparse_random.todense()
        _, test_sparse_matrix = smurff.make_train_test(sparse_random, 0.2)

        # Create train and test sparse 
        train_sparse_matrix = sp.coo_matrix(train_dense_matrix) # acutally dense
        test_sparse_matrix = test_sparse_matrix.tocoo() 

        # Create train and test sparse representations of dense tensors 
        train_sparse_tensor = smurff.SparseTensor(pd.DataFrame({
            '0': train_sparse_matrix.row,
            '1': train_sparse_matrix.col,
            'v': train_sparse_matrix.data
        }), train_shape)
        test_sparse_tensor = smurff.SparseTensor(pd.DataFrame({
            '0': test_sparse_matrix.row,
            '1': test_sparse_matrix.col,
            'v': test_sparse_matrix.data
        }), train_shape)

        # Run SMURFF
        sparse_matrix_predictions = smurff.bpmf(train_dense_matrix,
                                              Ytest=test_sparse_matrix,
                                              num_latent=4,
                                              num_threads=1,
                                              verbose=verbose,
                                              burnin=50,
                                              nsamples=50,
                                              seed=1234)

        sparse_tensor_predictions = smurff.bpmf(train_sparse_tensor,
                                              Ytest=test_sparse_tensor,
                                              num_latent=4,
                                              num_threads=1,
                                              verbose=verbose,
                                              burnin=50,
                                              nsamples=50,
                                              seed=1234)

        # Sort and compare coords and predicted values
        sparse_matrix_predictions.sort()
        sparse_tensor_predictions.sort()

        self.assertEqual(len(sparse_matrix_predictions), len(sparse_tensor_predictions))
        for m, t in zip(sparse_matrix_predictions, sparse_tensor_predictions):
            self.assertEqual(m.coords, t.coords)
            self.assertAlmostEqual(m.pred_1sample, t.pred_1sample) 
Пример #15
0
    def test_macau_univariate(self):
        Y = scipy.sparse.rand(10, 20, 0.2)
        Y, Ytest = smurff.make_train_test(Y, 0.5)
        side1 = scipy.sparse.coo_matrix(np.random.rand(10, 2))
        side2 = scipy.sparse.coo_matrix(np.random.rand(20, 3))

        predictions = smurff.macau(Y,
                                   Ytest=Ytest,
                                   side_info=[side1, side2],
                                   univariate=True,
                                   num_latent=4,
                                   verbose=verbose,
                                   burnin=200,
                                   nsamples=200)
        self.assertEqual(Ytest.nnz, len(predictions))
Пример #16
0
    def test_macau_univariate(self):
        Y = scipy.sparse.rand(10, 20, 0.2)
        Y, Ytest = smurff.make_train_test(Y, 0.5)
        side1 = scipy.sparse.coo_matrix(np.random.rand(10, 2))
        side2 = scipy.sparse.coo_matrix(np.random.rand(20, 3))

        predictions = smurff.smurff(Y,
                                    Ytest=Ytest,
                                    priors=['macauone', 'macauone'],
                                    side_info=[side1, side2],
                                    num_latent=4,
                                    verbose=False,
                                    burnin=50,
                                    nsamples=50)
        self.assertEqual(Ytest.nnz, len(predictions))
Пример #17
0
    def test_make_train_test(self):
        nnz = 10 * 8 * 3
        idx = list(itertools.product(np.arange(10), np.arange(8),
                                     np.arange(3)))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.arange(float(nnz))

        Ytr, Yte = smurff.make_train_test(df, 0.4)
        self.assertEqual(Ytr.nnz, nnz * 0.6)
        self.assertEqual(Yte.nnz, nnz * 0.4)

        A1 = np.zeros((10, 8, 3))
        A2 = np.zeros((10, 8, 3))
        A1[df.A, df.B, df.C] = df.value
        A2[Ytr.columns[0], Ytr.columns[1], Ytr.columns[2]] = Ytr.values
        A2[Yte.columns[0], Yte.columns[1], Yte.columns[2]] = Yte.values

        self.assertTrue(np.allclose(A1, A2))
Пример #18
0
    def test_macau(self):
        Ydense  = np.random.rand(10, 20)
        r       = np.random.permutation(10*20)[:40] # 40 random samples from 10*20 matrix
        side1   = Ydense[:,1:2]
        side2   = Ydense[1:2,:].transpose()
        Y       = scipy.sparse.coo_matrix(Ydense) # convert to sparse
        Y       = scipy.sparse.coo_matrix( (Y.data[r], (Y.row[r], Y.col[r])), shape=Y.shape )
        Y, Ytest = smurff.make_train_test(Y, 0.5)

        predictions = smurff.smurff(Y,
                                Ytest=Ytest,
                                priors=['macau', 'macau'],
                                side_info=[side1, side2],
                                direct=True,
                                # side_info_noises=[[('fixed', 1.0, None, None, None)], [('adaptive', None, 0.5, 1.0, None)]],
                                num_latent=4,
                                verbose=False,
                                burnin=50,
                                nsamples=50)
Пример #19
0
    def run_train_session(self):
        Y = scipy.sparse.rand(15, 10, 0.2)
        self.Ytrain, self.Ytest = smurff.make_train_test(Y, 0.5)
        nmodes = len(self.Ytrain.shape)
        priors = ['normal'] * nmodes

        session = smurff.TrainSession(priors=priors,
                                      num_latent=4,
                                      burnin=10,
                                      nsamples=15,
                                      verbose=verbose,
                                      save_freq=1)

        session.addTrainAndTest(self.Ytrain, self.Ytest)

        session.init()
        while session.step():
            pass

        return session
Пример #20
0
    def test_macau(self):
        Ydense = np.random.rand(10, 20)
        r = np.random.permutation(
            10 * 20)[:40]  # 40 random samples from 10*20 matrix
        side1 = Ydense[:, 1:2]
        side2 = Ydense[1:2, :].transpose()
        Y = scipy.sparse.coo_matrix(Ydense)  # convert to sparse
        Y = scipy.sparse.coo_matrix((Y.data[r], (Y.row[r], Y.col[r])),
                                    shape=Y.shape)
        Y, Ytest = smurff.make_train_test(Y, 0.5)

        predictions = smurff.macau(Y,
                                   Ytest=Ytest,
                                   side_info=[side1, side2],
                                   num_latent=4,
                                   verbose=verbose,
                                   burnin=200,
                                   nsamples=200)

        self.assertEqual(Ytest.nnz, len(predictions))
Пример #21
0
    def run_train_session(self):
        Ydense  = np.random.normal(size = (10, 20)).reshape((10,20))
        r       = np.random.permutation(10*20)[:40] # 40 random samples from 10*20 matrix
        Y       = scipy.sparse.coo_matrix(Ydense) # convert to sparse
        Y       = scipy.sparse.coo_matrix( (Y.data[r], (Y.row[r], Y.col[r])), shape=Y.shape )

        self.Ytrain, self.Ytest = smurff.make_train_test(Y, 0.5)
        self.side_info   = Ydense


        nmodes = len(self.Ytrain.shape)
        priors = ['normal'] * nmodes

        session = smurff.TrainSession(priors = priors, num_latent=32,
                burnin=10, nsamples=15, verbose=verbose,
                save_freq = 1)

        session.addTrainAndTest(self.Ytrain, self.Ytest)
        session.addSideInfo(0, self.side_info)
        session.run()
        return session
Пример #22
0
    def run_train_session(self, nmodes, density):
        shape = range(5, nmodes + 5)  # 5, 6, 7, ...
        Y, X = smurff.generate.gen_tensor(shape, 3, density)
        self.Ytrain, self.Ytest = smurff.make_train_test(Y, 0.1)
        priors = ['normal'] * nmodes

        trainSession = smurff.TrainSession(
            priors=priors,
            num_latent=4,
            burnin=10,
            nsamples=nsamples,
            verbose=verbose,
            save_freq=1,
            save_name=smurff.helper.temp_savename())

        trainSession.addTrainAndTest(self.Ytrain, self.Ytest)
        for i, x in enumerate(X):
            trainSession.addSideInfo(i, x)

        trainSession.init()
        while trainSession.step():
            pass

        return trainSession, Y, X
Пример #23
0
 def train_test(self):
     Y = scipy.sparse.rand(15, 10, 0.2)
     Y, Ytest = smurff.make_train_test(Y, 0.5)
     return Y, Ytest
Пример #24
0
 def test_make_train_test(self):
     X = scipy.sparse.rand(15, 10, 0.2)
     Xtr, Xte = smurff.make_train_test(X, 0.5)
     self.assertEqual(X.nnz, Xtr.nnz + Xte.nnz)
     diff = np.linalg.norm((X - Xtr - Xte).todense())
     self.assertEqual(diff, 0.0)
Пример #25
0
    ),
]

for url, expected_sha, output in urls:
    if os.path.isfile(output):
        actual_sha = sha256(open(output, "rb").read()).hexdigest()
        if (expected_sha == actual_sha):
            continue

    print("download %s" % output)
    urllib.request.urlretrieve(url, output)

ic50 = sio.mmread("chembl-IC50-346targets.mm")
feat = sio.mmread("chembl-IC50-compound-feat.mm")
ic50_100c = ic50.tocsr()[0:100, :]
ic50_100c_train, ic50_100c_test = smurff.make_train_test(ic50_100c, 0.2, 1234)

# 0,1 binary for probit
ic50_01 = ic50.copy()
ic50_01.data = (ic50_01.data >= 6) * 1.

# -1,+1
ic50_11 = ic50.copy()
ic50_11.data = ((ic50.data >= 6) * 2.) - 1.

feat_100 = feat.tocsr()[0:100, :]
feat_100 = feat_100[:, feat_100.getnnz(0) > 0]
feat_100_dense = feat_100.todense()

generated_files = [
    (
Пример #26
0
#!/usr/bin/env python

import smurff
import matrix_io as mio

#load data
ic50 = mio.read_matrix("chembl-IC50-346targets.mm")
ic50_train, ic50_test = smurff.make_train_test(ic50, 0.2)
ic50_threshold = 6.

session = smurff.TrainSession(
    priors=['normal', 'normal'],
    num_latent=32,
    burnin=10,
    nsamples=10,
    # Using threshold of 6. to calculate AUC on test data
    threshold=ic50_threshold)

## using activity threshold pIC50 > 6. to binarize train data
session.addTrainAndTest(ic50_train, ic50_test,
                        smurff.ProbitNoise(ic50_threshold))
predictions = session.run()
print("RMSE = %.2f" % smurff.calc_rmse(predictions))
print("AUC = %.2f" % smurff.calc_auc(predictions, ic50_threshold))