def test_stats_entities(collection_data: CollectionData): c1 = BioEntity('c1', 'cn1', CHEMICAL) g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE) d1 = BioEntity('d1', 'dn1', DISEASE) collection_data.relations_db.insert_entries([ relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.99), relation_entry_from_entities(c1, g2, 'transport', pmid='2', prob=0.01), relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=0.02), relation_entry_from_entities(g2, d1, 'marker', pmid='4', prob=0.09) ]) task = CalculateStatsTask(collection_data) task.execute() stats = collection_data.stats assert stats.chemicals == EntityGroupCounts(total=1, relations=3, top=[EntityIdCount('c1', 3)]) assert stats.genes == EntityGroupCounts( total=2, relations=4, top=[EntityIdCount('g2', 3), EntityIdCount('g1', 1)]) assert stats.diseases == EntityGroupCounts(total=1, relations=1, top=[EntityIdCount('d1', 1)])
def test_get_relation_pmid_probs_all_pmids(relations_db: ExtractedRelationsDatabase): c1 = BioEntity('c1', 'cn1', CHEMICAL) g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE) relations_db.insert_entries([ relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9), relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=0.5), relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=1.0), ]) probs = list(relations_db.get_relation_pmid_probs(id1='c1', id2='g1', label='expression', pmids=['1', '2', '3'])) assert set(probs) == {RelationPmidProb('1', 0.9), RelationPmidProb('2', 0.5)}
def test_get_entity_pairs(relations_db: ExtractedRelationsDatabase): c1 = BioEntity('c1', 'cn1', CHEMICAL) g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE) relations_db.insert_entries([ relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9), relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=0.5), relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=0.1) ]) pairs = list(relations_db.get_entity_pairs()) assert len(pairs) == 2 assert pairs[0] == (c1, g1) assert pairs[1] == (c1, g2)
def test_stats_total(collection_data: CollectionData): c1 = BioEntity('c1', 'cn1', CHEMICAL) g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE) collection_data.relations_db.insert_entries([ relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9), relation_entry_from_entities(c1, g2, 'transport', pmid='2', prob=0.1) ]) task = CalculateStatsTask(collection_data) task.execute() stats = collection_data.stats assert stats.total_relations == 2 assert stats.total_entities == 3
def test_merged_relations_pmid_filter(relations_db: ExtractedRelationsDatabase): c1 = BioEntity('c1', 'cn1', CHEMICAL) g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE) d1 = BioEntity('d1', 'dn1', DISEASE) relations_db.insert_entries([ relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9), relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=0.5), relation_entry_from_entities(g1, d1, 'marker', pmid='1', prob=0.1) ]) merged = list(relations_db.get_merged_relations(pmid='1')) assert len(merged) == 2 assert merged[0] == MergedRelation(c1, g1, 'expression', prob=pytest.approx(0.9), pmids=['1']) assert merged[1] == MergedRelation(g1, d1, 'marker', prob=pytest.approx(0.1), pmids=['1'])
def test_stats_r_type_counts(collection_data: CollectionData): c1 = BioEntity('c1', 'cn1', CHEMICAL) g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE) collection_data.relations_db.insert_entries([ relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.99), relation_entry_from_entities(c1, g2, 'transport', pmid='2', prob=0.01), relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=0.02), relation_entry_from_entities(c1, g2, 'transport', pmid='4', prob=0.09) ]) task = CalculateStatsTask(collection_data) task.execute() stats = collection_data.stats assert stats.r_type_counts == [ RTypeCounts('expression', [0] * 19 + [1]), RTypeCounts('transport', [2, 1] + [0] * 18) ]
def test_get_extracted_relations(relations_db: ExtractedRelationsDatabase): c1 = BioEntity('c1', 'cn1', CHEMICAL) g1, g2 = BioEntity('g1', 'gn1', GENE), BioEntity('g2', 'gn2', GENE) relations_db.insert_entries([ relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=0.9), relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=0.5), relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=1.0), ]) extracted = list(relations_db.get_extracted_relations()) assert len(extracted) == 3 assert extracted[0].as_dict() == \ relation_entry_from_entities(c1, g1, 'expression', pmid='1', prob=pytest.approx(0.9)).as_dict() assert extracted[1].as_dict() == \ relation_entry_from_entities(c1, g1, 'expression', pmid='2', prob=pytest.approx(0.5)).as_dict() assert extracted[2].as_dict() == \ relation_entry_from_entities(c1, g2, 'transport', pmid='3', prob=pytest.approx(1.0)).as_dict()