Пример #1
0
    def test_pipeline(self):
        rnds = np.random.RandomState(0)
        R12 = rnds.rand(50, 30)
        R13 = rnds.rand(50, 40)
        R23 = rnds.rand(30, 40)

        t1 = ObjectType('type1', 30)
        t2 = ObjectType('type2', 40)
        t3 = ObjectType('type3', 40)
        relations = [
            Relation(R12, t1, t2),
            Relation(R13, t1, t3),
            Relation(R23, t2, t3)
        ]
        fusion_graph = FusionGraph()
        fusion_graph.add_relations_from(relations)

        fuser = Dfmf(random_state=rnds).fuse(fusion_graph)
        self.assertEqual(fuser.factor(t1).shape, (50, 30))
        self.assertEqual(fuser.factor(t2).shape, (30, 40))
        self.assertEqual(fuser.factor(t3).shape, (40, 40))
        self.assertEqual(fuser.backbone(relations[0]).shape, (30, 40))
        self.assertEqual(fuser.backbone(relations[1]).shape, (30, 40))
        self.assertEqual(fuser.backbone(relations[2]).shape, (40, 40))

        new_R12 = rnds.rand(15, 30)
        new_R13 = rnds.rand(15, 40)

        new_relations = [Relation(new_R12, t1, t2), Relation(new_R13, t1, t3)]
        new_graph = FusionGraph(new_relations)

        transformer = DfmfTransform(random_state=rnds).transform(
            t1, new_graph, fuser)
        self.assertEqual(transformer.factor(t1).shape, (15, 30))
Пример #2
0
    def test_dfmf(self):
        rnds = np.random.RandomState(0)
        R12 = rnds.rand(30, 30)
        R13 = rnds.rand(30, 30)

        t1 = ObjectType('type1', 50)
        t2 = ObjectType('type2', 30)
        t3 = ObjectType('type3', 10)
        fusion_graph = FusionGraph()
        relations = [Relation(R12, t1, t2), Relation(R13, t1, t3)]
        fusion_graph.add_relations_from(relations)

        fuser = Dfmf(init_type='random', random_state=rnds,
                     n_run=3).fuse(fusion_graph)
        self.assertEqual(len(list(fuser.factor(t1))), 3)
        self.assertEqual(len(list(fuser.factor(t2))), 3)
        self.assertEqual(len(list(fuser.factor(t3))), 3)
        self.assertEqual(len(list(fuser.backbone(relations[0]))), 3)
        self.assertEqual(len(list(fuser.backbone(relations[1]))), 3)
        for object_type in [t1, t2, t3]:
            for factor in fuser.factor(object_type):
                self.assertEqual(factor.shape, (30, object_type.rank))

        G1 = fuser.factor(t1, run=1)
        S13 = fuser.backbone(relations[1], run=1)
        G3 = fuser.factor(t3, run=1)
        R13_hat = np.dot(G1, np.dot(S13, G3.T))
        completed = fuser.complete(relations[1], run=1)
        np.testing.assert_almost_equal(completed, R13_hat)
Пример #3
0
    def setUp(self):
        rnds = np.random.RandomState(0)
        X = rnds.rand(30, 30)
        self.t1 = ObjectType('Type 1', 10)
        self.t2 = ObjectType('Type 2', 10)
        self.t3 = ObjectType('Type 3', 10)
        self.t4 = ObjectType('Type 4', 10)
        self.t5 = ObjectType('Type 5', 10)
        self.relations1 = [
            Relation(X, self.t1, self.t2, name='Test1'),
            Relation(X, self.t2, self.t3),
            Relation(X, self.t3, self.t4),
            Relation(X, self.t4, self.t5),
            Relation(X, self.t3, self.t5),
            Relation(X, self.t5, self.t1)
        ]

        self.relations2 = [
            Relation(X, self.t1, self.t2, name='Test2'),
            Relation(X, self.t1, self.t2),
            Relation(X, self.t2, self.t3),
            Relation(X, self.t3, self.t4),
            Relation(X, self.t4, self.t5),
            Relation(X, self.t3, self.t5),
            Relation(X, self.t5, self.t1),
            Relation(X, self.t4, self.t4),
            Relation(X, self.t4, self.t4, name='Test3'),
            Relation(X, self.t5, self.t5)
        ]
Пример #4
0
def load_dicty():
    """Construct fusion graph from molecular biology of Dictyostelium."""
    gene = ObjectType('Gene', 50)
    go_term = ObjectType('GO term', 15)
    exprc = ObjectType('Experimental condition', 5)

    data, rn, cn = load_source(join('dicty', 'dicty.gene_annnotations.csv.gz'))
    ann = Relation(data=data,
                   row_type=gene,
                   col_type=go_term,
                   name='ann',
                   row_names=rn,
                   col_names=cn)
    data, rn, cn = load_source(join('dicty', 'dicty.gene_expression.csv.gz'))
    expr = Relation(data=data,
                    row_type=gene,
                    col_type=exprc,
                    name='expr',
                    row_names=rn,
                    col_names=cn)
    expr.data = np.log(np.maximum(expr.data, np.finfo(np.float).eps))
    data, rn, cn = load_source(join('dicty', 'dicty.ppi.csv.gz'))
    ppi = Relation(data=data,
                   row_type=gene,
                   col_type=gene,
                   name='ppi',
                   row_names=rn,
                   col_names=cn)
    return FusionGraph([ann, expr, ppi])
    def test_dfmf(self):
        rnds = np.random.RandomState(0)
        R12_1 = np.random.rand(30, 30)
        R12_2 = np.random.rand(30, 30)
        R13 = np.random.rand(30, 20)

        t1 = ObjectType('type1', 30)
        t2 = ObjectType('type2', 30)
        t3 = ObjectType('type3', 20)
        relations = [
            Relation(R12_1, t1, t2),
            Relation(R12_2, t1, t2),
            Relation(R13, t1, t3)
        ]
        fusion_graph = FusionGraph()
        fusion_graph.add_relations_from(relations)
        self.assertEqual(len(fusion_graph.relations), 3)
        self.assertEqual(len(fusion_graph.object_types), 3)

        fuser = Dfmf(init_type='random', random_state=rnds).fuse(fusion_graph)
        self.assertEqual(fuser.backbone(relations[0]).shape, (30, 30))
        self.assertEqual(fuser.backbone(relations[1]).shape, (30, 30))
        self.assertEqual(fuser.backbone(relations[2]).shape, (30, 20))
        G1 = fuser.factor(t1)
        G2 = fuser.factor(t2)
        S12_1 = fuser.backbone(relations[0])
        S12_2 = fuser.backbone(relations[1])
        R12_1_hat = np.dot(G1, np.dot(S12_1, G2.T))
        R12_2_hat = np.dot(G1, np.dot(S12_2, G2.T))
        np.testing.assert_almost_equal(fuser.complete(relations[0]), R12_1_hat)
        np.testing.assert_almost_equal(fuser.complete(relations[1]), R12_2_hat)
Пример #6
0
    def test_transformation(self):
        R12 = np.random.rand(5, 3)

        t1 = ObjectType('type1', 2)
        t2 = ObjectType('type2', 2)
        relation = Relation(R12, t1, t2)
        fusion_graph = FusionGraph()
        fusion_graph.add_relation(relation)

        rnds = np.random.RandomState(0)
        fuser = Dfmf(init_type='random', random_state=rnds,
                     max_iter=100).fuse(fusion_graph)

        new_R12 = R12[:2].copy()
        new_graph = FusionGraph([Relation(new_R12, t1, t2)])

        new_rnds = np.random.RandomState(0)
        transformer = DfmfTransform(random_state=new_rnds).transform(
            t1, new_graph, fuser)

        new_G1 = transformer.factor(t1)
        G1 = fuser.factor(t1)
        G2 = fuser.factor(t2)
        S12 = fuser.backbone(relation)
        new_R12_hat = np.dot(new_G1, np.dot(S12, G2.T))
        R12_hat = np.dot(G1, np.dot(S12, G2.T))

        diff_G1 = new_G1 - G1[:2]
        diff_hat = new_R12_hat - R12_hat[:2]
        self.assertLess(np.sum(diff_G1**2) / diff_G1.size, 1e-5)
        self.assertLess(np.sum(diff_hat**2) / diff_hat.size, 1e-5)
Пример #7
0
    def test_dfmc(self):
        rnds = np.random.RandomState(0)
        R12 = rnds.rand(50, 30)

        t1 = ObjectType('type1', 50)
        t2 = ObjectType('type2', 30)
        relation = Relation(R12, t1, t2)
        fusion_graph = FusionGraph()
        fusion_graph.add_relation(relation)

        fuser = Dfmc(init_type='random', random_state=rnds).fuse(fusion_graph)
        self.assertEqual(fuser.backbone(relation).shape, (50, 30))
        self.assertEqual(fuser.factor(t1).shape, (50, 50))
        self.assertEqual(fuser.factor(t2).shape, (30, 30))
        np.testing.assert_almost_equal(fuser.complete(relation), relation.data)
Пример #8
0
    def test_preprocessors(self):
        rnds = np.random.RandomState(0)
        R12 = rnds.rand(50, 30)

        t1 = ObjectType('type1', 50)
        t2 = ObjectType('type2', 30)

        def preprocessor(data):
            return np.ones_like(data)

        relation = Relation(R12, t1, t2, preprocessor=preprocessor)
        fusion_graph = FusionGraph()
        fusion_graph.add_relation(relation)

        fuser = Dfmf(init_type='random', random_state=rnds).fuse(fusion_graph)
        self.assertEqual(fuser.backbone(relation).shape, (50, 30))
        self.assertEqual(fuser.factor(t1).shape, (50, 50))
        self.assertEqual(fuser.factor(t2).shape, (30, 30))
        trnf = np.ones_like(relation.data)
        np.testing.assert_almost_equal(fuser.complete(relation), trnf)
Пример #9
0
    def test_postprocessors(self):
        rnds = np.random.RandomState(0)
        R12 = rnds.rand(50, 30)
        R12 = np.ma.masked_greater(R12, 0.7)

        t1 = ObjectType('type1', 50)
        t2 = ObjectType('type2', 30)

        def postprocessor(data):
            return data - 10

        relation = Relation(R12, t1, t2, name='R', postprocessor=postprocessor)
        fusion_graph = FusionGraph()
        fusion_graph.add_relation(relation)

        fuser = Dfmc(init_type='random', random_state=rnds).fuse(fusion_graph)
        self.assertEqual(fuser.backbone(relation).shape, (50, 30))
        self.assertEqual(fuser.factor(t1).shape, (50, 50))
        self.assertEqual(fuser.factor(t2).shape, (30, 30))
        trnf = relation.data - 10
        np.testing.assert_almost_equal(fuser.complete(relation), trnf)
        np.testing.assert_equal(fusion_graph.get_relation('R').data, R12)
def run_parallel(n_jobs=1):

    start_t = time.time()
    n1, n2 = 500, 500

    # poizkusil tudi z:
    # n1, n2 = 10000, 10000
    R12 = np.random.rand(n1, n2)
    print(f"Number of jobs is {n_jobs}")

    r1, r2 = 10, 10
    # Poizkusil tudi z
    # r1, r2 = 150, 150
    t1 = ObjectType('type1', r1)
    t2 = ObjectType('type2', r2)
    relations = [Relation(R12, t1, t2)]
    fusion_graph = FusionGraph(relations)

    fuser = Dfmf(init_type='random_vcol', n_jobs=n_jobs).fuse(fusion_graph)
    preds = fuser.complete(relations[0])
    # print(f"Error is {rmse(R12, preds)}")

    print(f"Done in {time.time() - start_t} sec.")
Пример #11
0
    def test_infinite(self):
        rnds = np.random.RandomState(0)
        R12 = rnds.rand(50, 30)
        R13 = rnds.rand(50, 10)
        R12 = np.ma.masked_greater(R12, 0.7)
        R12[R12 < 0.1] = np.nan
        R13[R13 < 0.5] = np.inf

        t1 = ObjectType('type1', 50)
        t2 = ObjectType('type2', 30)
        t3 = ObjectType('type3', 10)
        relations = [
            Relation(R12, t1, t2, fill_value='row_mean'),
            Relation(R13, t1, t3, fill_value='col_mean')
        ]
        fusion_graph = FusionGraph(relations)

        fuser = Dfmf(init_type='random', random_state=rnds).fuse(fusion_graph)
        self.assertEqual(fuser.backbone(relations[0]).shape, (50, 30))
        self.assertEqual(fuser.backbone(relations[1]).shape, (50, 10))
        self.assertEqual(fuser.factor(t1).shape, (50, 50))
        self.assertEqual(fuser.factor(t2).shape, (30, 30))
        size = np.sum(np.isfinite(fuser.complete(relations[0])))
        np.testing.assert_equal(size, R12.size)
Пример #12
0
def load_pharma():
    """Construct fusion graph from the pharmacology domain."""
    action = ObjectType('Action', 5)
    pmid = ObjectType('PMID', 5)
    depositor = ObjectType('Depositor', 5)
    fingerprint = ObjectType('Fingerprint', 20)
    depo_cat = ObjectType('Depositor category', 5)
    chemical = ObjectType('Chemical', 10)

    data, rn, cn = load_source(join('pharma', 'pharma.actions.csv.gz'))
    actions = Relation(data=data,
                       row_type=chemical,
                       col_type=action,
                       row_names=rn,
                       col_names=cn)
    data, rn, cn = load_source(join('pharma', 'pharma.pubmed.csv.gz'))
    pubmed = Relation(data=data,
                      row_type=chemical,
                      col_type=pmid,
                      row_names=rn,
                      col_names=cn)
    data, rn, cn = load_source(join('pharma', 'pharma.depositors.csv.gz'))
    depositors = Relation(data=data,
                          row_type=chemical,
                          col_type=depositor,
                          row_names=rn,
                          col_names=cn)
    data, rn, cn = load_source(join('pharma', 'pharma.fingerprints.csv.gz'))
    fingerprints = Relation(data=data,
                            row_type=chemical,
                            col_type=fingerprint,
                            row_names=rn,
                            col_names=cn)
    data, rn, cn = load_source(join('pharma', 'pharma.depo_cats.csv.gz'))
    depo_cats = Relation(data=data,
                         row_type=depositor,
                         col_type=depo_cat,
                         row_names=rn,
                         col_names=cn)
    data, rn, cn = load_source(join('pharma', 'pharma.tanimoto.csv.gz'))
    tanimoto = Relation(data=data,
                        row_type=chemical,
                        col_type=chemical,
                        row_names=rn,
                        col_names=cn)
    return FusionGraph(
        [actions, pubmed, depositors, fingerprints, depo_cats, tanimoto])