示例#1
0
文件: test_py.py 项目: timodw/smurff
    def setUpClass(cls):
        files = [
            "train.sdm", "test.sdm", "side_c2v.ddm",
            "side_ecfp6_counts_var005.sdm", "side_ecfp6_folded_dense.ddm"
        ]

        cls.data = {f: mio.read_matrix(f) for f in files}
示例#2
0
 def test_matrix_ddm(self):
     matrix_filename = "test_matrix_ddt.ddm"
     matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME,
                                           matrix_filename)
     expected_matrix = numpy.random.randn(10, 20)
     matrix_io.write_matrix(matrix_relative_path, expected_matrix)
     actual_matrix = matrix_io.read_matrix(matrix_relative_path)
     self.assertTrue(numpy.array_equal(actual_matrix, expected_matrix))
示例#3
0
 def test_matrix_sdm(self):
     matrix_filename = "test_matrix_sdm.sdm"
     matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME,
                                           matrix_filename)
     expected_matrix = scipy.sparse.rand(10, 20, 0.5)
     matrix_io.write_matrix(matrix_relative_path, expected_matrix)
     actual_matrix = matrix_io.read_matrix(matrix_relative_path)
     self.assertTrue((expected_matrix != actual_matrix).nnz == 0)
示例#4
0
 def test_dense_matrix_csv(self):
     matrix_filename = "test_dense_matrix_csv.csv"
     matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME,
                                           matrix_filename)
     expected_matrix = numpy.random.randn(10, 20)
     matrix_io.write_matrix(matrix_relative_path, expected_matrix)
     actual_matrix = matrix_io.read_matrix(matrix_relative_path)
     self.assertTrue(numpy.allclose(actual_matrix, expected_matrix))
示例#5
0
 def test_matrix_sbm(self):
     matrix_filename = "test_matrix_sbm.sbm"
     matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME,
                                           matrix_filename)
     expected_dense_matrix = numpy.random.randint(0, 2, size=(10, 20))
     expected_sparse_matrix = scipy.sparse.coo_matrix(expected_dense_matrix)
     matrix_io.write_matrix(matrix_relative_path, expected_sparse_matrix)
     actual_matrix = matrix_io.read_matrix(matrix_relative_path)
     self.assertTrue((expected_sparse_matrix != actual_matrix).nnz == 0)
示例#6
0
 def test_matrix_sparse_mtx(self):
     matrix_filename = "test_matrix_sparse_mtx.mtx"
     matrix_relative_path = "{}/{}".format(self.TEMP_DIR_NAME,
                                           matrix_filename)
     expected_matrix = scipy.sparse.rand(10, 20, 0.5)
     matrix_io.write_matrix(matrix_relative_path, expected_matrix)
     actual_matrix = matrix_io.read_matrix(matrix_relative_path)
     self.assertTrue(
         numpy.allclose(actual_matrix.todense(), expected_matrix.todense()))
示例#7
0
文件: predict.py 项目: timodw/smurff
    def fromStepFile(cls, file_name, iter):
        cp = read_config_file(file_name)
        nmodes = int(cp["models"]["num_models"])
        sample = cls(nmodes, iter)

        # latent matrices
        for i in range(sample.nmodes):
            file_name = cp["models"]["model_" + str(i)]
            sample.add_latent(mio.read_matrix(file_name))

        # link matrices (beta)
        for i in range(sample.nmodes):
            file_name = cp["priors"]["prior_" + str(i)]
            try:
                sample.add_beta(mio.read_matrix(file_name))
            except FileNotFoundError:
                sample.add_beta(np.ndarray((0, 0)))

        return sample
示例#8
0
def calc_rmse(predfile, test):
    predictions = mio.read_matrix(predfile)

    # extract predictions in test matrix
    selected_predictions = [
        smurff.Prediction((i, j), v, pred_avg=predictions[i, j])
        for i, j, v in zip(*sparse.find(test))
    ]

    return smurff.calc_rmse(selected_predictions)
示例#9
0
    def fromStepFile(cls, file_name, dir_name):
        cp = read_config_file(file_name, dir_name)
        nmodes = int(cp["global"]["num_modes"])
        iter = int(cp["global"]["number"])
        sample = cls(nmodes, iter)

        # latent matrices
        for i in range(sample.nmodes):
            file_name = os.path.join(dir_name, cp["latents"]["latents_" + str(i)])
            sample.add_latent(mio.read_matrix(file_name))

        # link matrices (beta)
        for i in range(sample.nmodes):
            file_name = cp["link_matrices"]["link_matrix_" + str(i)]
            if (file_name != 'none'):
                sample.add_beta(mio.read_matrix(os.path.join(dir_name, file_name)))
            else:
                sample.add_beta(np.ndarray((0, 0)))

        return sample
示例#10
0
    def fromStepFile(cls, file_name, iter):
        cp = HeadlessConfigParser(file_name)
        nmodes = int(cp["num_models"])
        sample = cls(nmodes, iter)
        sample.predictions = pd.read_csv(cp["pred"], sep=";")

        # latent matrices
        for i in range(sample.nmodes):
            file_name = cp["model_" + str(i)]
            sample.add_latent(mio.read_matrix(file_name))

        # link matrices (beta)
        for i in range(sample.nmodes):
            file_name = cp["prior_" + str(i)]
            try:
                sample.add_beta(mio.read_matrix(file_name))
            except FileNotFoundError:
                sample.add_beta(np.ndarray((0, 0)))

        return sample
示例#11
0
 def test_read_cpp_generated_sparse_matrix_mtx(self):
     matrix_relative_path = "test_data/cpp_generated_sparse_matrix.mtx"
     expected_matrix_rows = numpy.array([0, 0, 0, 0, 2, 2, 2, 2])
     expected_matrix_cols = numpy.array([0, 1, 2, 3, 0, 1, 2, 3])
     expected_matrix_vals = numpy.array([1, 2, 3, 4, 9, 10, 11, 12])
     expected_matrix = scipy.sparse.coo_matrix(
         (expected_matrix_vals,
          (expected_matrix_rows, expected_matrix_cols)),
         shape=(3, 4))
     actual_matrix = matrix_io.read_matrix(matrix_relative_path)
     self.assertTrue(
         numpy.allclose(actual_matrix.todense(), expected_matrix.todense()))
示例#12
0
    def test_macauoom(self):
        train = mio.read_matrix("train.mm").tocsr()
        test = mio.read_matrix("test.mm").tocsr()
        sideinfo = mio.read_matrix("sideinfo.mm").tocsr()

        bpmf_rmse = train_session(
            mkdtemp(),
            train,
            test,
        )
        rootdir = mkdtemp()
        macau_rmse = train_session(
            rootdir,
            train,
            test,
            sideinfo,
        )

        # make out-of-matrix predictions for rows not in train
        num_nonzeros_train = np.diff(train.indptr)
        test_empty = test[num_nonzeros_train == 0]

        rootfile = join(rootdir, "root.ini")
        predict_session = smurff.PredictSession(rootfile)
        rmse_im = im_prediction(predict_session, test_empty)
        rmse_oom_py = smurff_py_oom_prediction(predict_session, sideinfo,
                                               test_empty)

        rmse_oom_cmd = smurff_cmd_oom_prediction(rootfile, "sideinfo.mm",
                                                 test_empty)
        rmse_oom_tf = tf_cmd_oom_prediction(rootdir, "sideinfo.mm", test_empty)
        rmse_oom_af = af_cmd_oom_prediction(rootdir, sideinfo, test_empty)

        print("bpmf full test : %.2f" % bpmf_rmse)
        print("macau full test: %.2f" % macau_rmse)
        print("in-matrix: %.2f" % rmse_im)
        print("out-of-matrix smurff python: %.2f" % rmse_oom_py)
        print("out-of-matrix smurff cmd: %.2f" % rmse_oom_cmd)
        print("out-of-matrix tf (floats): %.2f" % rmse_oom_tf)
        print("out-of-matrix af (floats): %.2f" % rmse_oom_af)
示例#13
0
    def fromStepFile(cls, file_name, dir_name):
        cp = read_config_file(file_name, dir_name)
        nmodes = int(cp["global"]["num_modes"])
        iter = int(cp["global"]["number"])
        sample = cls(nmodes, iter)

        # predictions, rmse
        sample.pred_stats = dict(
            read_config_file(cp["predictions"]["pred_state"],
                             dir_name)["global"].items())
        sample.pred_avg = mio.read_matrix(
            os.path.join(dir_name, cp["predictions"]["pred_avg"]))
        sample.pred_var = mio.read_matrix(
            os.path.join(dir_name, cp["predictions"]["pred_var"]))

        # latent matrices
        for i in range(sample.nmodes):
            file_name = os.path.join(dir_name,
                                     cp["latents"]["latents_" + str(i)])
            U = mio.read_matrix(file_name)
            postMu = None
            postLambda = None

            file_name = cp["latents"]["post_mu_" + str(i)]
            if (file_name != 'none'):
                postMu = mio.read_matrix(os.path.join(dir_name, file_name))

            file_name = cp["latents"]["post_lambda_" + str(i)]
            if (file_name != 'none'):
                postLambda = mio.read_matrix(os.path.join(dir_name, file_name))

            sample.add_latent(U, postMu, postLambda)

            # link matrices (beta) and hyper mus
            beta = np.ndarray((0, 0))
            mu = np.ndarray((0, 0))

            file_name = cp["link_matrices"]["link_matrix_" + str(i)]
            if (file_name != 'none'):
                beta = mio.read_matrix(os.path.join(dir_name, file_name))
            file_name = cp["link_matrices"]["mu_" + str(i)]
            if (file_name != 'none'):
                mu = mio.read_matrix(os.path.join(dir_name, file_name))
                mu = np.squeeze(mu)

            sample.add_beta(beta, mu)

        return sample
示例#14
0
#!/usr/bin/env python3

import matrix_io as mio
import sys

for f in sys.argv[1:]:
    m = mio.read_matrix(f)
    try:
        print(f, ":", m.shape, m.nnz)
    except AttributeError:
        print(f, ":", m.shape)
示例#15
0
 def test_read_cpp_generated_dense_matrix_csv(self):
     matrix_relative_path = "test_data/cpp_generated_dense_matrix.csv"
     expected_matrix = numpy.array([[1, 2, 3, 4], [5, 6, 7, 8],
                                    [9, 10, 11, 12]])
     actual_matrix = matrix_io.read_matrix(matrix_relative_path)
     self.assertTrue(numpy.allclose(actual_matrix, expected_matrix))
示例#16
0
文件: center.py 项目: timodw/smurff
    else:
        if (args.mode == "cols"):
            m = m - np.broadcast_to(np.expand_dims(mean, 0), m.shape)
        elif (args.mode == "rows"):
            m = m - np.broadcast_to(np.expand_dims(mean, 1), m.shape)
        elif (args.mode == "global"):
            m = m - mean
        elif (args.mode == "none"):
            pass
        elif (args.mode != "none"):
            raise ValueError("Unknown centering mode: %s" % (args.mode))

    return m


train = mio.read_matrix(args.train)
test = mio.read_matrix(args.test)
assert train.shape == test.shape
mean_train = mean(train, args.mode)
centered_train = center(train, args.mode, mean_train)
centered_test = center(test, args.mode, mean_train)
mio.write_matrix(os.path.join(args.output, os.path.basename(args.train)),
                 centered_train)
mio.write_matrix(os.path.join(args.output, os.path.basename(args.test)),
                 centered_test)

features = []
if args.col_features: features += args.col_features
if args.row_features: features += args.row_features

for fname in features:
示例#17
0
#!/usr/bin/python

import matrix_io as mio
import numpy as np
import math
from glob import glob

# collect U for all samples
Us = [ mio.read_matrix(m) for m in glob("output/U-[0123456789].ddm") ]
print("samples:\n", Us)

# stack them and compute mean
Ustacked = np.stack(Us)
mu1 = np.mean(Ustacked, axis = 0)
print("python mu:\n", mu1)
mu2 = mio.read_matrix("output/U-mu.ddm")
print("bpmf mu:\n", mu2)
print("norm mu1 - mu2: %.4f" % np.linalg.norm(mu1 - mu2))

# Compute covariance and precision, first unstack in different way
Uunstacked = np.squeeze(np.split(Ustacked, Ustacked.shape[2], axis = 2))
Ucov = [ np.cov(u, rowvar = False) for u in Uunstacked ]
Uprec = [ np.linalg.inv(np.cov(u, rowvar = False)) for u in Uunstacked ]
# restack
Ucovstacked = np.stack(Ucov, axis = 2)
Lambda1 = np.stack(Uprec, axis = 2)
# reshape correctly

print("python: precision user 0\n", Lambda1[:,:,0])

Lambda2_flat = mio.read_matrix("output/U-Lambda.ddm")
示例#18
0
#!/usr/bin/python

import matrix_io as mio
import numpy as np
import math
from glob import glob

# collect U for all samples
Us = [mio.read_matrix(m) for m in glob("output/U-[0123456789].ddm")]
print("samples:\n", Us)

# stack them and compute mean
Ustacked = np.stack(Us)
mu1 = np.mean(Ustacked, axis=0)
print("python mu:\n", mu1)
mu2 = mio.read_matrix("output/U-mu.ddm")
print("bpmf mu:\n", mu2)
print("norm mu1 - mu2: %.4f" % np.linalg.norm(mu1 - mu2))

# Compute covariance and precision, first unstack in different way
Uunstacked = np.squeeze(np.split(Ustacked, Ustacked.shape[2], axis=2))
Ucov = [np.cov(u, rowvar=False) for u in Uunstacked]
Uprec = [np.linalg.inv(np.cov(u, rowvar=False)) for u in Uunstacked]
# restack
Ucovstacked = np.stack(Ucov, axis=2)
Lambda1 = np.stack(Uprec, axis=2)
# reshape correctly

print("python: precision user 0\n", Lambda1[:, :, 0])

Lambda2_flat = mio.read_matrix("output/U-Lambda.ddm")
示例#19
0
#!/usr/bin/env python

import smurff
import matrix_io as mio

#load data
ic50 = mio.read_matrix("chembl-IC50-346targets.mm")
ic50_train, ic50_test = smurff.make_train_test(ic50, 0.2)
ic50_threshold = 6.

session = smurff.TrainSession(
    priors=['normal', 'normal'],
    num_latent=32,
    burnin=10,
    nsamples=10,
    # Using threshold of 6. to calculate AUC on test data
    threshold=ic50_threshold)

## using activity threshold pIC50 > 6. to binarize train data
session.addTrainAndTest(ic50_train, ic50_test,
                        smurff.ProbitNoise(ic50_threshold))
predictions = session.run()
print("RMSE = %.2f" % smurff.calc_rmse(predictions))
print("AUC = %.2f" % smurff.calc_auc(predictions, ic50_threshold))