Пример #1
0
def test_stats_entities(collection_data: CollectionData):
    c1 = BioEntity('c1', 'cn1', CHEMICAL)
    g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE)
    d1 = BioEntity('d1', 'dn1', DISEASE)
    collection_data.relations_db.insert_entries([
        relation_entry_from_entities(c1, g1, 'expression', pmid='1',
                                     prob=0.99),
        relation_entry_from_entities(c1, g2, 'transport', pmid='2', prob=0.01),
        relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=0.02),
        relation_entry_from_entities(g2, d1, 'marker', pmid='4', prob=0.09)
    ])

    task = CalculateStatsTask(collection_data)
    task.execute()
    stats = collection_data.stats

    assert stats.chemicals == EntityGroupCounts(total=1,
                                                relations=3,
                                                top=[EntityIdCount('c1', 3)])
    assert stats.genes == EntityGroupCounts(
        total=2,
        relations=4,
        top=[EntityIdCount('g2', 3),
             EntityIdCount('g1', 1)])
    assert stats.diseases == EntityGroupCounts(total=1,
                                               relations=1,
                                               top=[EntityIdCount('d1', 1)])
Пример #2
0
def test_get_relation_pmid_probs_all_pmids(relations_db: ExtractedRelationsDatabase):
    c1 = BioEntity('c1', 'cn1', CHEMICAL)
    g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE)

    relations_db.insert_entries([
        relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9),
        relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=0.5),
        relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=1.0),
    ])

    probs = list(relations_db.get_relation_pmid_probs(id1='c1', id2='g1', label='expression', pmids=['1', '2', '3']))
    assert set(probs) == {RelationPmidProb('1', 0.9), RelationPmidProb('2', 0.5)}
Пример #3
0
def test_get_entity_pairs(relations_db: ExtractedRelationsDatabase):
    c1 = BioEntity('c1', 'cn1', CHEMICAL)
    g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE)
    relations_db.insert_entries([
        relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9),
        relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=0.5),
        relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=0.1)
    ])

    pairs = list(relations_db.get_entity_pairs())
    assert len(pairs) == 2
    assert pairs[0] == (c1, g1)
    assert pairs[1] == (c1, g2)
Пример #4
0
def test_stats_total(collection_data: CollectionData):
    c1 = BioEntity('c1', 'cn1', CHEMICAL)
    g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE)
    collection_data.relations_db.insert_entries([
        relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9),
        relation_entry_from_entities(c1, g2, 'transport', pmid='2', prob=0.1)
    ])

    task = CalculateStatsTask(collection_data)
    task.execute()
    stats = collection_data.stats

    assert stats.total_relations == 2
    assert stats.total_entities == 3
Пример #5
0
def test_merged_relations_pmid_filter(relations_db: ExtractedRelationsDatabase):
    c1 = BioEntity('c1', 'cn1', CHEMICAL)
    g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE)
    d1 = BioEntity('d1', 'dn1', DISEASE)

    relations_db.insert_entries([
        relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9),
        relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=0.5),
        relation_entry_from_entities(g1, d1, 'marker', pmid='1', prob=0.1)
    ])

    merged = list(relations_db.get_merged_relations(pmid='1'))
    assert len(merged) == 2
    assert merged[0] == MergedRelation(c1, g1, 'expression', prob=pytest.approx(0.9), pmids=['1'])
    assert merged[1] == MergedRelation(g1, d1, 'marker', prob=pytest.approx(0.1), pmids=['1'])
Пример #6
0
def test_stats_r_type_counts(collection_data: CollectionData):
    c1 = BioEntity('c1', 'cn1', CHEMICAL)
    g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE)
    collection_data.relations_db.insert_entries([
        relation_entry_from_entities(c1, g1, 'expression', pmid='1',
                                     prob=0.99),
        relation_entry_from_entities(c1, g2, 'transport', pmid='2', prob=0.01),
        relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=0.02),
        relation_entry_from_entities(c1, g2, 'transport', pmid='4', prob=0.09)
    ])

    task = CalculateStatsTask(collection_data)
    task.execute()
    stats = collection_data.stats

    assert stats.r_type_counts == [
        RTypeCounts('expression', [0] * 19 + [1]),
        RTypeCounts('transport', [2, 1] + [0] * 18)
    ]
Пример #7
0
def test_get_extracted_relations(relations_db: ExtractedRelationsDatabase):
    c1 = BioEntity('c1', 'cn1', CHEMICAL)
    g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE)

    relations_db.insert_entries([
        relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9),
        relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=0.5),
        relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=1.0),
    ])

    extracted = list(relations_db.get_extracted_relations())
    assert len(extracted) == 3
    assert extracted[0].as_dict() == \
           relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=pytest.approx(0.9)).as_dict()
    assert extracted[1].as_dict() == \
           relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=pytest.approx(0.5)).as_dict()
    assert extracted[2].as_dict() == \
           relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=pytest.approx(1.0)).as_dict()