Exemplo n.º 1
0
    def test_equivalent_different_descriptions(self):
        """compare entities with differences in the descriptions object."""

        m1, m2 = Entity("A", "X"), Entity("A", "X")
        m2.set_description("background", "Q")
        self.assertFalse(m1.equivalent(m2))
        self.assertFalse(m2.equivalent(m1))
Exemplo n.º 2
0
    def test_init(self):
        """init of basic object"""

        m = Entity("abc", "genes", marker_id="X:001", marker_symbol="x001")
        self.assertEqual(m.id, "abc")
        self.assertEqual(m.category, "genes")
        self.assertEqual(m.get("marker_id"), "X:001")
        self.assertEqual(m.get("marker_symbol"), "x001")
Exemplo n.º 3
0
    def test_get_description_string(self):
        """multiple descriptors set and encoded"""

        m = Entity("abc", "genes", background="mouse")
        m.set_description("allele", "aaa")
        desc_str = m.get("description")
        desc = json.loads(desc_str)
        self.assertEqual(len(desc), 2, "should have background, allele")
        self.assertEqual(desc["background"], "mouse")
        self.assertEqual(desc["allele"], "aaa")
Exemplo n.º 4
0
    def test_filter_from_dict(self):

        source = dict()
        source["o1"] = Entity("o1", "X")
        source["o2"] = Entity("o2", "Y")
        source["o3"] = Entity("o3", "X")
        source["o4"] = Entity("o4", "Z")
        result = filter_entities_cat(source, {"X", "Z"})
        self.assertEqual(len(result), 3)
        self.assertEqual(result["o1"].id, "o1")
Exemplo n.º 5
0
    def test_filter_category(self):

        source = [
            Entity("o1", "X"),
            Entity("o2", "Y"),
            Entity("o3", "X"),
            Entity("o4", "Z")
        ]
        result = filter_entities_cat(source, {"X", "Z"})
        self.assertEqual(len(result), 3)
        self.assertEqual(result[0].id, "o1")
Exemplo n.º 6
0
    def test_average(self):
        """summarize phenotypes using an average (consistent values)."""

        # first add several pieces of evidence into an entity object
        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("MP:002", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("MP:007", Experiment(0, 0.4, 0.05))
        d3 = PhenotypeDatum("MP:002", Experiment(1, 0.6, 0.05))
        d4 = PhenotypeDatum("MP:007", Experiment(0, 0.4, 0.15))
        d5 = PhenotypeDatum("MP:009", Experiment(1, 0.6, 0.05))
        m.add(d1).add(d2).add(d3)
        m.add(d4).add(d5)
        self.assertEqual(len(m.data), 5)

        # check that the average contains all phenotypes
        m.average()
        self.assertEqual(len(m.data), 3)
        expected_tpr = {"MP:002": 0.7, "MP:007": 0.4, "MP:009": 0.6}
        expected_fpr = {"MP:002": 0.05, "MP:007": 0.1, "MP:009": 0.05}
        expected_val = {"MP:002": 1, "MP:007": 0, "MP:009": 1}
        for i in range(3):
            iphen = m.data[i].phenotype
            iexp = m.data[i].experiment
            self.assertEqual(iexp.value, expected_val[iphen])
            self.assertEqual(iexp.tpr, expected_tpr[iphen])
            self.assertEqual(iexp.fpr, expected_fpr[iphen])
Exemplo n.º 7
0
def tech_model(id, control_type="match"):
    """create an Entity object with some description fields."""

    result = Entity(id,
                    "control",
                    allele_id="",
                    allele_symbol="",
                    background="",
                    imputed_phenotypes=0,
                    control_type=control_type)
    return result
Exemplo n.º 8
0
    def test_equivalent_different_core_fields(self):
        """compare entities when core fields are different."""

        m1 = Entity("A", "X", marker_id="m")
        m2 = Entity("A", "X", marker_id="m", marker_symbol="m")
        self.assertFalse(m1.equivalent(m2))
        self.assertFalse(m2.equivalent(m1))
Exemplo n.º 9
0
    def test_equivalent_phenotypes(self):
        """entities with different phenotypes cannot be the same."""

        m1, m2 = Entity("A", "X"), Entity("A", "X")
        m1.add(PhenotypeDatum("MP:002", Experiment(1, 0.8, 0.05)))
        self.assertFalse(m1.equivalent(m2))
        self.assertFalse(m2.equivalent(m1))
Exemplo n.º 10
0
    def test_consensus_imputed(self):
        """summarize multiple rows of phenotypes using a consensus, with imputed values"""

        # first add several pieces of evidence into an entity object
        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("MP:002", Experiment(1, 0.8, 0.05))
        # here add with value between 0 and 1
        d2 = PhenotypeDatum("MP:007", Experiment(0.6, 0.4, 0.05))
        d3 = PhenotypeDatum("MP:007", Experiment(0.4, 0.6, 0.05))
        m.add(d1).add(d2).add(d3)
        self.assertEqual(len(m.data), 3)

        # check that the consensus matches the inputs
        m.consensus()
        self.assertEqual(len(m.data), 2)
        c1 = m.data[0]
        c2 = m.data[1]
        expected_tpr = {"MP:002": 0.8, "MP:007": 0.5}
        expected_fpr = {"MP:002": 0.05, "MP:007": 0.05}
        expected_val = {"MP:002": 1, "MP:007": 0.5}
        for i in range(2):
            iphen = m.data[i].phenotype
            iexp = m.data[i].experiment
            self.assertEqual(iexp.value, expected_val[iphen])
            self.assertEqual(iexp.tpr, expected_tpr[iphen])
            self.assertEqual(iexp.fpr, expected_fpr[iphen])
Exemplo n.º 11
0
    def test_has(self):
        """object can identify what keys it has stored"""

        m = Entity("abc", "genes", x=0)
        self.assertTrue(m.has("id"))
        self.assertTrue(m.has("description"))
        self.assertTrue(m.has("x"))
        self.assertFalse(m.has("y"))
Exemplo n.º 12
0
    def test_equivalent_simple(self):
        """compare entities without description fields."""

        m1 = Entity("A", "X", marker_id="m")
        m2 = Entity("A", "X", marker_id="m")
        self.assertTrue(m1.equivalent(m1))
        self.assertTrue(m1.equivalent(m2))
        self.assertTrue(m2.equivalent(m1))
Exemplo n.º 13
0
def impc_model(id, category, row, zygosity):
    """build a model object using row dict from IMPC data"""

    result = Entity(id,
                    category,
                    marker_id=row["marker_accession_id"],
                    marker_symbol=row["marker_symbol"],
                    allele_id=row["allele_accession_id"],
                    allele_symbol=row["allele_symbol"],
                    background=row["strain_name"],
                    imputed_phenotypes=0,
                    zygosity=zygosity,
                    source="IMPC")
    return result
Exemplo n.º 14
0
def get_gxd(gxd_path, emp_map, tprfpr):
    """read a file with marker-emapa associationss
    
    Arguments:
        gxd_path   file with columns ....
        emp_map      dict mapping EMAPA ids to other ids
        tprfpr     2-tuple with (tpr, fpr)
    
    Returns:
        dict mapping markers to phenotypes terms
    """

    tpr = tprfpr[0]
    fpr = tprfpr[1]

    # get all the mapping from the raw file
    result = dict()
    with open_file(gxd_path, "rt") as f:
        reader = csv.DictReader(f, delimiter="\t", quotechar="'")
        for row in reader:
            feature = row["feature.primaryIdentifier"]
            emapa = row["structure.identifier"]
            strength = row["strength"]

            if feature not in result:
                modelid = "GXD_" + feature
                result[feature] = Entity(modelid,
                                         "expression",
                                         marker_id=feature)
                result[feature].set_description("expression", 1)
                result[feature].set_description("source", "GXD")

            if emapa not in emp_map:
                continue
            if strength not in gxd_strength:
                continue

            # determine whether to add a positive or negative phenotype
            strength_factor = gxd_strength[strength]
            row_exp = Experiment(1, fpr + (tpr - fpr) * strength_factor, fpr)
            if strength == "Absent":
                row_exp.value = 0
            for mp in emp_map[emapa]:
                result[feature].add(PhenotypeDatum(mp, row_exp))

    # get a concensus value
    for id in result:
        result[id].consensus()

    return result
Exemplo n.º 15
0
    def test_equivalent_same_phenotypes(self):
        """entities with the same phenotypes are equivalent"""

        m1, m2 = Entity("A", "X"), Entity("A", "X")
        m1.add(PhenotypeDatum("MP:002", Experiment(1, 0.8, 0.05)))
        m1.add(PhenotypeDatum("MP:005", Experiment(1, 0.8, 0.05)))
        m2.add(PhenotypeDatum("MP:005", Experiment(1, 0.8, 0.05)))
        m2.add(PhenotypeDatum("MP:002", Experiment(1, 0.8, 0.05)))
        self.assertTrue(m1.equivalent(m2))
        self.assertTrue(m2.equivalent(m1))
Exemplo n.º 16
0
    def test_trim_easy_keep(self):
        """trimming does not eliminate node if ask to keep."""

        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05))
        m.add(d1).add(d2)

        self.assertEqual(len(m.data), 2)
        m.trim_ancestors(self.obo, set(["DOID:4"]))
        self.assertEqual(len(m.data), 2)
Exemplo n.º 17
0
    def test_trim_nothing(self):
        """trimming does nothing if there is nothing to do."""

        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("DOID:3650", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05))
        m.add(d1).add(d2)

        self.assertEqual(len(m.data), 2)
        m.trim_ancestors(self.obo)
        self.assertEqual(len(m.data), 2)
Exemplo n.º 18
0
    def test_equivalent_different_phenotypes(self):
        """entities with the same phenotypes are equivalent"""

        m1, m2 = Entity("A", "X"), Entity("A", "X")
        # add phenotypes, but two
        m1.add(PhenotypeDatum("MP:001", Experiment(1, 0.8, 0.05)))
        m1.add(PhenotypeDatum("MP:005", Experiment(1, 0.8, 0.05)))
        m2.add(PhenotypeDatum("MP:005", Experiment(1, 0.8, 0.05)))
        m2.add(PhenotypeDatum("MP:002", Experiment(1, 0.8, 0.05)))
        self.assertFalse(m1.equivalent(m2))
        self.assertFalse(m2.equivalent(m1))
Exemplo n.º 19
0
    def test_trim_easy(self):
        """trimming eliminates root node."""

        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05))
        m.add(d1).add(d2)

        self.assertEqual(len(m.data), 2)
        m.trim_ancestors(self.obo)
        self.assertEqual(len(m.data), 1)
        self.assertEqual(m.data[0].phenotype, "DOID:11044")
Exemplo n.º 20
0
    def test_trim_medium(self):
        """trimming eliminates when when there are several leafs."""

        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("DOID:4", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("DOID:11044", Experiment(1, 0.8, 0.05))
        d3 = PhenotypeDatum("DOID:0080015", Experiment(1, 0.8, 0.05))
        d4 = PhenotypeDatum("DOID:655", Experiment(1, 0.8, 0.05))
        m.add(d1).add(d2).add(d3).add(d4)

        self.assertEqual(len(m.data), 4)
        m.trim_ancestors(self.obo)
        self.assertEqual(len(m.data), 2)
        result = set([_.phenotype for _ in m.data])
        self.assertEqual(result, set(["DOID:11044", "DOID:655"]))
Exemplo n.º 21
0
    def test_add_phenotype_data(self):
        """cannot add corrupt data"""

        m = Entity("abc", "genes", marker_id="X:001", marker_symbol="x001")
        self.assertEqual(len(m.data), 0, "initial model has no pheontypes")
        d1 = PhenotypeDatum("MP:002", Experiment(1, 0.8, 0.0555))
        m.add(d1)
        d2 = PhenotypeDatum("MP:007", Experiment(1, 0.456, 0.0234))
        m.add(d2)
        self.assertEqual(len(m.data), 2, "just added two phenotypes")
        # check content of each datum
        pheno_str_0 = str(m.data[0])
        pheno_str_1 = str(m.data[1])
        self.assertTrue("002" in pheno_str_0)
        self.assertTrue("555" in pheno_str_0)
        self.assertTrue("234" in pheno_str_1)
Exemplo n.º 22
0
    def test_average_2(self):
        """summarize phenotypes using an average (discordant values)."""

        # first add several pieces of evidence into an entity object
        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("MP:002", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("MP:002", Experiment(0, 0.4, 0.05))
        d3 = PhenotypeDatum("MP:002", Experiment(1, 0.6, 0.05))
        m.add(d1).add(d2).add(d3)
        self.assertEqual(len(m.data), 3)

        # check that the consensus matches the inputs
        m.average()
        self.assertEqual(len(m.data), 1)
        self.assertEqual(m.data[0].phenotype, "MP:002")
        iexp = m.data[0].experiment
        self.assertGreater(iexp.value, 0)
        self.assertAlmostEqual(iexp.tpr, (0.8 + 0.0 + 0.6) / 3)
        self.assertAlmostEqual(iexp.fpr, 0.05)
Exemplo n.º 23
0
    def test_consensus_2(self):
        """summarize multiple rows of phenotypes using a consensus with some discordance."""

        # first add several pieces of evidence into an entity object
        m = Entity("abc", "genes")
        d1 = PhenotypeDatum("MP:002", Experiment(1, 0.8, 0.05))
        d2 = PhenotypeDatum("MP:002", Experiment(0, 0.4, 0.05))
        d3 = PhenotypeDatum("MP:002", Experiment(1, 0.6, 0.05))
        m.add(d1).add(d2).add(d3)
        self.assertEqual(len(m.data), 3)

        # check that the consensus matches the inputs
        m.consensus()
        self.assertEqual(len(m.data), 1)
        c1 = m.data[0]
        iphen = m.data[0].phenotype
        iexp = m.data[0].experiment
        self.assertEqual(iexp.value, 1)
        # the tpr will be lower than (0.6+0.8)/2
        # it should be (0.7*2/3)
        self.assertEqual(iexp.tpr, 0.7 * (2 / 3))
        self.assertEqual(iexp.fpr, 0.05)
Exemplo n.º 24
0
    def test_equivalent_up_to_timestamp(self):
        """compare entities with different timestamps."""

        m1 = Entity("A", "X", marker_id="m", timestamp="2017")
        m2 = Entity("A", "X", marker_id="m", timestamp="2018")
        self.assertTrue(m1.equivalent(m2))
Exemplo n.º 25
0
    def test_keywords(self):
        """initialize with keywords"""

        m = Entity("abc", "genes", background="mouse")
        self.assertTrue("background" in m.description)
Exemplo n.º 26
0
    def test_add_wrong_type(self):
        """cannot add data of wrong class"""

        m = Entity("abc", "genes")
        with self.assertRaises(Exception):
            m.add("bob")
Exemplo n.º 27
0
    def test_str(self):
        """object can create a string summary"""

        m = Entity("abc", "genes")
        mstr = str(m)
        self.assertTrue("abc" in mstr)
Exemplo n.º 28
0
    def test_filter_none(self):

        source = [Entity("o1", "X"), Entity("o2", "Y")]
        result = filter_entities(source, None)
        self.assertEqual(len(result), 2)
        self.assertEqual(result[0].id, "o1")