Пример #1
0
 def test_too_many_sides(self):
     Y = scipy.sparse.rand(10, 20, 0.2)
     with self.assertRaises(AssertionError):
         smurff.smurff(Y,
                       priors=['normal', 'normal', 'normal'],
                       side_info=[None, None, None],
                       verbose=False)
Пример #2
0
    def test_bpmf_tensor3(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(20, 2)
        C = np.random.randn(1, 2)

        idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) )
        df  = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        predictions = smurff.smurff(Ytrain,
                                Ytest=Ytest,
                                priors=['normal', 'normal', 'normal'],
                                num_latent=4,
                                verbose=False,
                                burnin=20,
                                nsamples=20)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." % rmse)

        Ytrain_sp = scipy.sparse.coo_matrix( (Ytrain.data.value, (Ytrain.data.A, Ytrain.data.B) ) )
        Ytest_sp  = scipy.sparse.coo_matrix( (Ytest.data.value,  (Ytest.data.A, Ytest.data.B) ) )

        results_mat = smurff.smurff(Ytrain_sp,
                                    Ytest=Ytest_sp,
                                    priors=['normal', 'normal'],
                                    num_latent=4,
                                    verbose=False,
                                    burnin=20,
                                    nsamples=20)
Пример #3
0
 def test_bpmf_emptytest(self):
     X = scipy.sparse.rand(15, 10, 0.2)
     smurff.smurff(X,
                   priors=['normal', 'normal'],
                   num_latent=10,
                   burnin=10,
                   nsamples=15,
                   verbose=False)
Пример #4
0
 def test_bpmf_emptytest_probit(self):
     X = scipy.sparse.rand(15, 10, 0.2)
     X.data = X.data > 0.5
     smurff.smurff(X,
                   priors=['normal', 'normal'],
                   num_latent=10,
                   burnin=10,
                   nsamples=15,
                   verbose=verbose)
Пример #5
0
 def test_threads(self):
     Y = scipy.sparse.rand(10, 20, 0.2)
     for t in range(7):  # 1, 2, 4, 8, 16, 32, 64
         smurff.smurff(Y,
                       priors=['normal', 'normal'],
                       num_latent=4,
                       num_threads=2**t,
                       verbose=False,
                       burnin=5,
                       nsamples=5)
Пример #6
0
    def test_bpmf_dense_matrix_sparse_2d_tensor(self):
        np.random.seed(1234)

        # Generate train dense matrix
        train_shape = (5 ,5)
        train_sparse_matrix = scipy.sparse.random(5, 5, density=1.0)
        train_dense_matrix = train_sparse_matrix.todense()

        # Generate test sparse matrix
        test_shape = (5, 5)
        test_rows = np.random.randint(0, 5, 5)
        test_cols = np.random.randint(0, 4, 5)
        test_vals = np.random.randn(5)
        test_sparse_matrix = scipy.sparse.coo_matrix((test_vals, (test_rows, test_cols)), test_shape)

        # Create train and test sparse tensors
        train_sparse_tensor = smurff.SparseTensor(pd.DataFrame({
            '0': train_sparse_matrix.row,
            '1': train_sparse_matrix.col,
            'v': train_sparse_matrix.data
        }), train_shape)
        test_sparse_tensor = smurff.SparseTensor(pd.DataFrame({
            '0': test_sparse_matrix.row,
            '1': test_sparse_matrix.col,
            'v': test_sparse_matrix.data
        }), train_shape)

        # Run SMURFF
        sparse_matrix_predictions = smurff.smurff(train_dense_matrix,
                                              Ytest=test_sparse_matrix,
                                              priors=['normal', 'normal'],
                                              num_latent=4,
                                              verbose=False,
                                              burnin=50,
                                              nsamples=50,
                                              seed=1234)

        sparse_tensor_predictions = smurff.smurff(train_sparse_tensor,
                                              Ytest=test_sparse_tensor,
                                              priors=['normal', 'normal'],
                                              num_latent=4,
                                              verbose=False,
                                              burnin=50,
                                              nsamples=50,
                                              seed=1234)

        # Transfrom SMURFF results to dictionary of coords and predicted values
        sparse_matrix_results_dict = collections.OrderedDict((p.coords, p.pred_1sample) for p in sparse_matrix_predictions)
        sparse_tensor_results_dict = collections.OrderedDict((p.coords, p.pred_1sample) for p in sparse_tensor_predictions)

        self.assertEqual(len(sparse_matrix_results_dict), len(sparse_tensor_results_dict))
        self.assertEqual(sparse_tensor_results_dict.keys(), sparse_tensor_results_dict.keys())
        for coords, matrix_pred_1sample in sparse_matrix_results_dict.items():
            tensor_pred_1sample = sparse_tensor_results_dict[coords]
            self.assertAlmostEqual(matrix_pred_1sample, tensor_pred_1sample)
Пример #7
0
 def test_bpmf_numerictest(self):
     X = scipy.sparse.rand(15, 10, 0.2)
     Xt = 0.3
     X, Xt = smurff.make_train_test(X, Xt)
     smurff.smurff(X,
                   Ytest=Xt,
                   priors=['normal', 'normal'],
                   num_latent=10,
                   burnin=10,
                   nsamples=15,
                   verbose=False)
Пример #8
0
 def test_macau_dense(self):
     Y = scipy.sparse.rand(15, 10, 0.2)
     Yt = scipy.sparse.rand(15, 10, 0.1)
     F = np.random.randn(15, 2)
     smurff.smurff(Y,
                   Ytest=Yt,
                   priors=['macau', 'normal'],
                   side_info=[F, None],
                   num_latent=5,
                   burnin=10,
                   nsamples=5,
                   verbose=False)
Пример #9
0
 def test_macau_side_bin(self):
     X = scipy.sparse.rand(15, 10, 0.2)
     Xt = scipy.sparse.rand(15, 10, 0.1)
     F = scipy.sparse.rand(15, 2, 0.5)
     F.data[:] = 1
     smurff.smurff(X,
                   Ytest=Xt,
                   priors=['macau', 'normal'],
                   side_info=[F, None],
                   num_latent=5,
                   burnin=10,
                   nsamples=5,
                   verbose=False)
Пример #10
0
    def test_macau_tensor(self):
        A = np.random.randn(15, 2)
        B = np.random.randn(3, 2)
        C = np.random.randn(2, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]),
                              np.arange(C.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array(
            [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx])
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        Acoo = scipy.sparse.coo_matrix(A)

        predictions = smurff.smurff(Ytrain=Ytrain,
                                    Ytest=Ytest,
                                    priors=['macau', 'normal', 'normal'],
                                    side_info=[Acoo, None, None],
                                    num_latent=4,
                                    verbose=0,
                                    burnin=20,
                                    nsamples=20)

        rmse = smurff.calc_rmse(predictions)

        self.assertTrue(rmse < 0.5,
                        msg="Tensor factorization gave RMSE above 0.5 (%f)." %
                        rmse)
Пример #11
0
    def test_bpmf_tensor(self):
        np.random.seed(1234)
        shape = [5, 4, 3]

        Y = smurff.SparseTensor(
            pd.DataFrame({
                "A": np.random.randint(0, 5, 7),
                "B": np.random.randint(0, 4, 7),
                "C": np.random.randint(0, 3, 7),
                "value": np.random.randn(7)
            }), shape)

        Ytest = smurff.SparseTensor(
            pd.DataFrame({
                "A": np.random.randint(0, 5, 5),
                "B": np.random.randint(0, 4, 5),
                "C": np.random.randint(0, 3, 5),
                "value": np.random.randn(5)
            }), shape)

        predictions = smurff.smurff(Y,
                                    Ytest=Ytest,
                                    priors=['normal', 'normal', 'normal'],
                                    num_latent=4,
                                    verbose=False,
                                    burnin=50,
                                    nsamples=50)
Пример #12
0
    def test_macau_dense_probit(self):
        A = np.random.randn(25, 2)
        B = np.random.randn(3, 2)

        idx = list(
            itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0])))
        df = pd.DataFrame(np.asarray(idx), columns=["A", "B"])
        df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :])
                                 for i in idx]) > 0.0).astype(np.float64)
        Ytrain, Ytest = smurff.make_train_test_df(df, 0.2)

        predictions = smurff.smurff(
            Ytrain,
            Ytest=Ytest,
            priors=['macau', 'normal'],
            #prior_noises=[('probit', None, None, None, 0.5), ('fixed', 1.0, None, None, None)],
            side_info=[A, None],
            num_latent=4,
            burnin=20,
            nsamples=20,
            verbose=False)

        self.assertTrue(
            rmse > 0.55,
            msg=
            "Probit factorization (with dense side) gave AUC below 0.55 (%f)."
            % rmse)
Пример #13
0
 def test_bpmf(self):
     Y = scipy.sparse.rand(10, 20, 0.2)
     Y, Ytest = smurff.make_train_test(Y, 0.5)
     predictions = smurff.smurff(Y,
                                 Ytest=Ytest,
                                 priors=['normal', 'normal'],
                                 num_latent=4,
                                 verbose=False,
                                 burnin=50,
                                 nsamples=50)
     self.assertEqual(Ytest.nnz, len(predictions))
Пример #14
0
    def test_macau_univariate(self):
        Y = scipy.sparse.rand(10, 20, 0.2)
        Y, Ytest = smurff.make_train_test(Y, 0.5)
        side1 = scipy.sparse.coo_matrix(np.random.rand(10, 2))
        side2 = scipy.sparse.coo_matrix(np.random.rand(20, 3))

        predictions = smurff.smurff(Y,
                                    Ytest=Ytest,
                                    priors=['macauone', 'macauone'],
                                    side_info=[side1, side2],
                                    num_latent=4,
                                    verbose=False,
                                    burnin=50,
                                    nsamples=50)
        self.assertEqual(Ytest.nnz, len(predictions))
Пример #15
0
    def test_macau(self):
        Ydense  = np.random.rand(10, 20)
        r       = np.random.permutation(10*20)[:40] # 40 random samples from 10*20 matrix
        side1   = Ydense[:,1:2]
        side2   = Ydense[1:2,:].transpose()
        Y       = scipy.sparse.coo_matrix(Ydense) # convert to sparse
        Y       = scipy.sparse.coo_matrix( (Y.data[r], (Y.row[r], Y.col[r])), shape=Y.shape )
        Y, Ytest = smurff.make_train_test(Y, 0.5)

        predictions = smurff.smurff(Y,
                                Ytest=Ytest,
                                priors=['macau', 'macau'],
                                side_info=[side1, side2],
                                direct=True,
                                # side_info_noises=[[('fixed', 1.0, None, None, None)], [('adaptive', None, 0.5, 1.0, None)]],
                                num_latent=4,
                                verbose=False,
                                burnin=50,
                                nsamples=50)
Пример #16
0
    def test_macau_tensor_empty(self):
        A = np.random.randn(30, 2)
        B = np.random.randn(4, 2)
        C = np.random.randn(2, 2)

        idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) )
        df  = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"])
        df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ])

        Acoo = scipy.sparse.coo_matrix(A)

        predictions = smurff.smurff(smurff.SparseTensor(df),
                           priors=['normal', 'normal', 'normal'],
                           num_latent=2,
                           burnin=5,
                           nsamples=5,
                           verbose=False)

        self.assertFalse(predictions)
Пример #17
0
from smurff import smurff
import scipy.io

train_matrix_path = "chembl-IC50-346targets.mm"
test_matrix_path = "chembl-IC50-test.mm"

train = scipy.io.mmread(train_matrix_path)
test = scipy.io.mmread(test_matrix_path)

result = smurff(
        train,
        Ynoise = ('fixed', 5., None, None, None),
        Ytest = test,
        priors = ["normal", "normal"],
        side_info = [ None, None ],
        aux_data =  [ [], [] ],
        num_latent = 1,
        burnin = 1,
        nsamples = 5)

print(result.predictions[0])