def test_too_many_sides(self): Y = scipy.sparse.rand(10, 20, 0.2) with self.assertRaises(AssertionError): smurff.smurff(Y, priors=['normal', 'normal', 'normal'], side_info=[None, None, None], verbose=False)
def test_bpmf_tensor3(self): A = np.random.randn(15, 2) B = np.random.randn(20, 2) C = np.random.randn(1, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) ) df = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ]) Ytrain, Ytest = smurff.make_train_test_df(df, 0.2) predictions = smurff.smurff(Ytrain, Ytest=Ytest, priors=['normal', 'normal', 'normal'], num_latent=4, verbose=False, burnin=20, nsamples=20) rmse = smurff.calc_rmse(predictions) self.assertTrue(rmse < 0.5, msg="Tensor factorization gave RMSE above 0.5 (%f)." % rmse) Ytrain_sp = scipy.sparse.coo_matrix( (Ytrain.data.value, (Ytrain.data.A, Ytrain.data.B) ) ) Ytest_sp = scipy.sparse.coo_matrix( (Ytest.data.value, (Ytest.data.A, Ytest.data.B) ) ) results_mat = smurff.smurff(Ytrain_sp, Ytest=Ytest_sp, priors=['normal', 'normal'], num_latent=4, verbose=False, burnin=20, nsamples=20)
def test_bpmf_emptytest(self): X = scipy.sparse.rand(15, 10, 0.2) smurff.smurff(X, priors=['normal', 'normal'], num_latent=10, burnin=10, nsamples=15, verbose=False)
def test_bpmf_emptytest_probit(self): X = scipy.sparse.rand(15, 10, 0.2) X.data = X.data > 0.5 smurff.smurff(X, priors=['normal', 'normal'], num_latent=10, burnin=10, nsamples=15, verbose=verbose)
def test_threads(self): Y = scipy.sparse.rand(10, 20, 0.2) for t in range(7): # 1, 2, 4, 8, 16, 32, 64 smurff.smurff(Y, priors=['normal', 'normal'], num_latent=4, num_threads=2**t, verbose=False, burnin=5, nsamples=5)
def test_bpmf_dense_matrix_sparse_2d_tensor(self): np.random.seed(1234) # Generate train dense matrix train_shape = (5 ,5) train_sparse_matrix = scipy.sparse.random(5, 5, density=1.0) train_dense_matrix = train_sparse_matrix.todense() # Generate test sparse matrix test_shape = (5, 5) test_rows = np.random.randint(0, 5, 5) test_cols = np.random.randint(0, 4, 5) test_vals = np.random.randn(5) test_sparse_matrix = scipy.sparse.coo_matrix((test_vals, (test_rows, test_cols)), test_shape) # Create train and test sparse tensors train_sparse_tensor = smurff.SparseTensor(pd.DataFrame({ '0': train_sparse_matrix.row, '1': train_sparse_matrix.col, 'v': train_sparse_matrix.data }), train_shape) test_sparse_tensor = smurff.SparseTensor(pd.DataFrame({ '0': test_sparse_matrix.row, '1': test_sparse_matrix.col, 'v': test_sparse_matrix.data }), train_shape) # Run SMURFF sparse_matrix_predictions = smurff.smurff(train_dense_matrix, Ytest=test_sparse_matrix, priors=['normal', 'normal'], num_latent=4, verbose=False, burnin=50, nsamples=50, seed=1234) sparse_tensor_predictions = smurff.smurff(train_sparse_tensor, Ytest=test_sparse_tensor, priors=['normal', 'normal'], num_latent=4, verbose=False, burnin=50, nsamples=50, seed=1234) # Transfrom SMURFF results to dictionary of coords and predicted values sparse_matrix_results_dict = collections.OrderedDict((p.coords, p.pred_1sample) for p in sparse_matrix_predictions) sparse_tensor_results_dict = collections.OrderedDict((p.coords, p.pred_1sample) for p in sparse_tensor_predictions) self.assertEqual(len(sparse_matrix_results_dict), len(sparse_tensor_results_dict)) self.assertEqual(sparse_tensor_results_dict.keys(), sparse_tensor_results_dict.keys()) for coords, matrix_pred_1sample in sparse_matrix_results_dict.items(): tensor_pred_1sample = sparse_tensor_results_dict[coords] self.assertAlmostEqual(matrix_pred_1sample, tensor_pred_1sample)
def test_bpmf_numerictest(self): X = scipy.sparse.rand(15, 10, 0.2) Xt = 0.3 X, Xt = smurff.make_train_test(X, Xt) smurff.smurff(X, Ytest=Xt, priors=['normal', 'normal'], num_latent=10, burnin=10, nsamples=15, verbose=False)
def test_macau_dense(self): Y = scipy.sparse.rand(15, 10, 0.2) Yt = scipy.sparse.rand(15, 10, 0.1) F = np.random.randn(15, 2) smurff.smurff(Y, Ytest=Yt, priors=['macau', 'normal'], side_info=[F, None], num_latent=5, burnin=10, nsamples=5, verbose=False)
def test_macau_side_bin(self): X = scipy.sparse.rand(15, 10, 0.2) Xt = scipy.sparse.rand(15, 10, 0.1) F = scipy.sparse.rand(15, 2, 0.5) F.data[:] = 1 smurff.smurff(X, Ytest=Xt, priors=['macau', 'normal'], side_info=[F, None], num_latent=5, burnin=10, nsamples=5, verbose=False)
def test_macau_tensor(self): A = np.random.randn(15, 2) B = np.random.randn(3, 2) C = np.random.randn(2, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array( [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx]) Ytrain, Ytest = smurff.make_train_test_df(df, 0.2) Acoo = scipy.sparse.coo_matrix(A) predictions = smurff.smurff(Ytrain=Ytrain, Ytest=Ytest, priors=['macau', 'normal', 'normal'], side_info=[Acoo, None, None], num_latent=4, verbose=0, burnin=20, nsamples=20) rmse = smurff.calc_rmse(predictions) self.assertTrue(rmse < 0.5, msg="Tensor factorization gave RMSE above 0.5 (%f)." % rmse)
def test_bpmf_tensor(self): np.random.seed(1234) shape = [5, 4, 3] Y = smurff.SparseTensor( pd.DataFrame({ "A": np.random.randint(0, 5, 7), "B": np.random.randint(0, 4, 7), "C": np.random.randint(0, 3, 7), "value": np.random.randn(7) }), shape) Ytest = smurff.SparseTensor( pd.DataFrame({ "A": np.random.randint(0, 5, 5), "B": np.random.randint(0, 4, 5), "C": np.random.randint(0, 3, 5), "value": np.random.randn(5) }), shape) predictions = smurff.smurff(Y, Ytest=Ytest, priors=['normal', 'normal', 'normal'], num_latent=4, verbose=False, burnin=50, nsamples=50)
def test_macau_dense_probit(self): A = np.random.randn(25, 2) B = np.random.randn(3, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B"]) df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :]) for i in idx]) > 0.0).astype(np.float64) Ytrain, Ytest = smurff.make_train_test_df(df, 0.2) predictions = smurff.smurff( Ytrain, Ytest=Ytest, priors=['macau', 'normal'], #prior_noises=[('probit', None, None, None, 0.5), ('fixed', 1.0, None, None, None)], side_info=[A, None], num_latent=4, burnin=20, nsamples=20, verbose=False) self.assertTrue( rmse > 0.55, msg= "Probit factorization (with dense side) gave AUC below 0.55 (%f)." % rmse)
def test_bpmf(self): Y = scipy.sparse.rand(10, 20, 0.2) Y, Ytest = smurff.make_train_test(Y, 0.5) predictions = smurff.smurff(Y, Ytest=Ytest, priors=['normal', 'normal'], num_latent=4, verbose=False, burnin=50, nsamples=50) self.assertEqual(Ytest.nnz, len(predictions))
def test_macau_univariate(self): Y = scipy.sparse.rand(10, 20, 0.2) Y, Ytest = smurff.make_train_test(Y, 0.5) side1 = scipy.sparse.coo_matrix(np.random.rand(10, 2)) side2 = scipy.sparse.coo_matrix(np.random.rand(20, 3)) predictions = smurff.smurff(Y, Ytest=Ytest, priors=['macauone', 'macauone'], side_info=[side1, side2], num_latent=4, verbose=False, burnin=50, nsamples=50) self.assertEqual(Ytest.nnz, len(predictions))
def test_macau(self): Ydense = np.random.rand(10, 20) r = np.random.permutation(10*20)[:40] # 40 random samples from 10*20 matrix side1 = Ydense[:,1:2] side2 = Ydense[1:2,:].transpose() Y = scipy.sparse.coo_matrix(Ydense) # convert to sparse Y = scipy.sparse.coo_matrix( (Y.data[r], (Y.row[r], Y.col[r])), shape=Y.shape ) Y, Ytest = smurff.make_train_test(Y, 0.5) predictions = smurff.smurff(Y, Ytest=Ytest, priors=['macau', 'macau'], side_info=[side1, side2], direct=True, # side_info_noises=[[('fixed', 1.0, None, None, None)], [('adaptive', None, 0.5, 1.0, None)]], num_latent=4, verbose=False, burnin=50, nsamples=50)
def test_macau_tensor_empty(self): A = np.random.randn(30, 2) B = np.random.randn(4, 2) C = np.random.randn(2, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) ) df = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ]) Acoo = scipy.sparse.coo_matrix(A) predictions = smurff.smurff(smurff.SparseTensor(df), priors=['normal', 'normal', 'normal'], num_latent=2, burnin=5, nsamples=5, verbose=False) self.assertFalse(predictions)
from smurff import smurff import scipy.io train_matrix_path = "chembl-IC50-346targets.mm" test_matrix_path = "chembl-IC50-test.mm" train = scipy.io.mmread(train_matrix_path) test = scipy.io.mmread(test_matrix_path) result = smurff( train, Ynoise = ('fixed', 5., None, None, None), Ytest = test, priors = ["normal", "normal"], side_info = [ None, None ], aux_data = [ [], [] ], num_latent = 1, burnin = 1, nsamples = 5) print(result.predictions[0])