def test_macau_dense_probit(self): A = np.random.randn(25, 2) B = np.random.randn(3, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B"]) df["value"] = (np.array([np.sum(A[i[0], :] * B[i[1], :]) for i in idx]) > 0.0).astype(np.float64) Ytrain, Ytest = macau.make_train_test_df(df, 0.2) results = macau.macau(Y=Ytrain, Ytest=Ytest, side=[A, None], num_latent=4, verbose=False, burnin=20, nsamples=20, univariate=False, precision="probit") self.assertTrue((results.prediction.columns[0:2] == ["A", "B"]).all()) self.assertTrue( results.rmse_test > 0.55, msg= "Probit factorization (with dense side) gave AUC below 0.55 (%f)." % results.rmse_test)
def test_macau_tensor_univariate(self): A = np.random.randn(30, 2) B = np.random.randn(4, 2) C = np.random.randn(2, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array( [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx]) Ytrain, Ytest = macau.make_train_test_df(df, 0.2) Acoo = scipy.sparse.coo_matrix(A) results = macau.macau(Y=Ytrain, Ytest=Ytest, side=[Acoo, None, None], num_latent=4, verbose=False, burnin=20, nsamples=20, univariate=True, precision=50) self.assertTrue(results.rmse_test < 0.5, msg="Tensor factorization gave RMSE above 0.5 (%f)." % results.rmse_test)
def test_bpmf_tensor2(self): A = np.random.randn(15, 2) B = np.random.randn(20, 2) C = np.random.randn(3, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) ) df = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ]) Ytrain, Ytest = macau.make_train_test_df(df, 0.2) results = macau.bpmf(Y = Ytrain, Ytest = Ytest, num_latent = 4, verbose = False, burnin = 20, nsamples = 20, univariate = False, precision = 50) self.assertTrue(results.rmse_test < 0.5, msg="Tensor factorization gave RMSE above 0.5 (%f)." % results.rmse_test)
def test_make_train_test_df(self): idx = list( itertools.product(np.arange(10), np.arange(8), np.arange(3) )) df = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.arange(10.0 * 8.0 * 3.0) Ytr, Yte = macau.make_train_test_df(df, 0.4) self.assertEqual(Ytr.shape[0], df.shape[0] * 0.6) self.assertEqual(Yte.shape[0], df.shape[0] * 0.4) A1 = np.zeros( (10, 8, 3) ) A2 = np.zeros( (10, 8, 3) ) A1[df.A, df.B, df.C] = df.value A2[Ytr.A, Ytr.B, Ytr.C] = Ytr.value A2[Yte.A, Yte.B, Yte.C] = Yte.value self.assertTrue(np.allclose(A1, A2))
def test_macau_dense_probit(self): A = np.random.randn(25, 2) B = np.random.randn(3, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0])) ) df = pd.DataFrame( np.asarray(idx), columns=["A", "B"]) df["value"] = (np.array([ np.sum(A[i[0], :] * B[i[1], :]) for i in idx ]) > 0.0).astype(np.float64) Ytrain, Ytest = macau.make_train_test_df(df, 0.2) results = macau.macau(Y = Ytrain, Ytest = Ytest, side=[A, None], num_latent = 4, verbose = False, burnin = 20, nsamples = 20, univariate = False, precision = "probit") self.assertTrue( (results.prediction.columns[0:2] == ["A", "B"]).all() ) self.assertTrue(results.auc_test > 0.55, msg="Probit factorization (with dense side) gave AUC below 0.55 (%f)." % results.rmse_test)
def test_macau_tensor(self): A = np.random.randn(15, 2) B = np.random.randn(3, 2) C = np.random.randn(2, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) ) df = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ]) Ytrain, Ytest = macau.make_train_test_df(df, 0.2) Acoo = scipy.sparse.coo_matrix(A) results = macau.macau(Y = Ytrain, Ytest = Ytest, side=[Acoo, None, None], num_latent = 4, verbose = False, burnin = 20, nsamples = 20, univariate = False, precision = 50) self.assertTrue( (results.prediction.columns[0:3] == ["A", "B", "C"]).all() ) self.assertTrue(results.rmse_test < 0.5, msg="Tensor factorization gave RMSE above 0.5 (%f)." % results.rmse_test)
import unittest import numpy as np import pandas as pd import scipy.sparse import macau import itertools A = np.random.randn(15, 2) B = np.random.randn(20, 2) C = np.random.randn(1, 2) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0])) ) df = pd.DataFrame( np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array([ np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx ]) Ytrain, Ytest = macau.make_train_test_df(df, 0.2) results = macau.bpmf(Y = Ytrain, Ytest = Ytest, num_latent = 4, verbose = True, burnin = 20, nsamples = 2, univariate = False, precision = 50) Ytrain_sp = scipy.sparse.coo_matrix( (Ytrain.value, (Ytrain.A, Ytrain.B) ) ) Ytest_sp = scipy.sparse.coo_matrix( (Ytest.value, (Ytest.A, Ytest.B) ) ) results = macau.bpmf(Y = Ytrain_sp, Ytest = Ytest_sp, num_latent = 4, verbose = True, burnin = 20, nsamples = 2, univariate = False, precision = 50)
def macau_test(): num_latents = 2 n_samples = 800 save_prefix = "macau_unit_test" ## generating toy data A = np.random.randn(15, num_latents) B = np.random.randn(3, num_latents) C = np.random.randn(5, num_latents) idx = list( itertools.product(np.arange(A.shape[0]), np.arange(B.shape[0]), np.arange(C.shape[0]))) df = pd.DataFrame(np.asarray(idx), columns=["A", "B", "C"]) df["value"] = np.array( [np.sum(A[i[0], :] * B[i[1], :] * C[i[2], :]) for i in idx]) ## side information is again a sparse matrix df_train, df_val = macau.make_train_test_df(df, 0.05) results = macau.macau(Y=df_train, Ytest=df_val, side=[None, None, None], num_latent=num_latents, verbose=True, burnin=400, nsamples=n_samples, precision="adaptive", save_prefix=save_prefix) str_dir = "results_unittests/" if (not os.path.exists(str_dir)): os.makedirs(str_dir) else: #replace_prev=input("This configuration has already been run !Do you want to continue ? y/n") #if (replace_prev=="n"): # raise ValueError("Aborted") shutil.rmtree(str_dir) os.makedirs(str_dir) files = os.listdir("./") for f in files: if (f.startswith(save_prefix)): shutil.move(f, str_dir) file_path = save_prefix N = n_samples mean_lat_pat = 0 mean_lat_meas = 0 mean_lat_time = 0 for n in range(1, N + 1): mean_lat_pat += np.loadtxt(str_dir + file_path + "-sample%d-U1-latents.csv" % n, delimiter=",") mean_lat_meas += np.loadtxt(str_dir + file_path + "-sample%d-U2-latents.csv" % n, delimiter=",") mean_lat_time += np.loadtxt(str_dir + file_path + "-sample%d-U3-latents.csv" % n, delimiter=",") mean_lat_pat /= N mean_lat_meas /= N mean_lat_time /= N np.save(str_dir + "mean_pat_latent.npy", mean_lat_pat) np.save(str_dir + "mean_pat_latent.npy", mean_lat_meas) np.save(str_dir + "mean_pat_latent.npy", mean_lat_time) print("Loaded") print("Patients Latents") print(mean_lat_pat.T) print("True Latents") print(A) print("Features Latents") print(mean_lat_meas.T) print("True Latents") print(B) print("Time Latents") print(mean_lat_time.T) print("True Latents") print(C)