def learn(resource, input, outdir, target_assocfile, target_ontology, target_root_class): """ Learn association rules """ logging.basicConfig(level=logging.INFO) afa = AssociationSetFactory() ofa = OntologyFactory() ont = ofa.create(resource) aset = afa.create_from_file(file=input, ontology=ont, fmt=None) learner = ol.OntologyLearner(assocs=aset) isa_ont = ont.subontology(relations=['subClassOf']) if target_root_class: learner.split_assocs(target_root_class, ontology=isa_ont) if target_ontology: learner.target_ontology = ofa.create(target_ontology) if target_assocfile: tont = ont if learner.target_ontology is not None: tont = learner.target_ontology learner.target_assocs = afa.create_from_file(target_assocfile, ontology=tont, fmt=None) with open(outdir + '/index.md', 'w') as file: learner.fit_all(dir=outdir, reportfile=file)
def load_associations_from_file(self, associations_type: DataType, associations_url: str, associations_cache_path: str, config: GenedescConfigParser) -> None: """load go associations from file Args: associations_type (DataType): the type of associations to set associations_url (str): url to the association file associations_cache_path (str): path to cache file for the associations config (GenedescConfigParser): configuration object where to read properties """ assoc_config = AssocParserConfig(remove_double_prefixes=True, paint=True) if associations_type == DataType.GO: logger.info("Loading GO associations from file") self.go_associations = AssociationSetFactory().create_from_assocs( assocs=GafParser(config=assoc_config).parse( file=self._get_cached_file( cache_path=associations_cache_path, file_source_url=associations_url), skipheader=True), ontology=self.go_ontology) self.go_associations = self.remove_blacklisted_annotations( association_set=self.go_associations, ontology=self.go_ontology, terms_blacklist=config.get_module_property( module=Module.GO, prop=ConfigModuleProperty.EXCLUDE_TERMS)) elif associations_type == DataType.DO: logger.info("Loading DO associations from file") self.do_associations = AssociationSetFactory().create_from_assocs( assocs=GafParser(config=assoc_config).parse( file=self._get_cached_file( cache_path=associations_cache_path, file_source_url=associations_url), skipheader=True), ontology=self.do_ontology) self.do_associations = self.remove_blacklisted_annotations( association_set=self.do_associations, ontology=self.do_ontology, terms_blacklist=config.get_module_property( module=Module.DO_EXP_AND_BIO, prop=ConfigModuleProperty.EXCLUDE_TERMS)) elif associations_type == DataType.EXPR: logger.info("Loading Expression associations from file") self.expression_associations = AssociationSetFactory( ).create_from_assocs(assocs=GafParser(config=assoc_config).parse( file=self._get_cached_file(cache_path=associations_cache_path, file_source_url=associations_url), skipheader=True), ontology=self.expression_ontology) self.expression_associations = self.remove_blacklisted_annotations( association_set=self.expression_associations, ontology=self.expression_ontology, terms_blacklist=config.get_module_property( module=Module.EXPRESSION, prop=ConfigModuleProperty.EXCLUDE_TERMS))
def test_remap_associations(self): associations = [] associations.append(DataManager.create_annotation_record("", "1", "a", "protein_coding", "001", "GO:0018996", "", "F", "EXP", None, "WB", "")) assocs = AssociationSetFactory().create_from_assocs(assocs=associations, ontology=self.df.go_ontology) self.df.set_associations(associations_type=DataType.GO, associations=assocs, config=self.conf_parser) self.assertEqual(self.df.go_associations.associations_by_subj["1"][0]["object"]["id"], "GO:0042303")
def test_get_common_ancestors(self): self.load_go_ontology() generator = OntologySentenceGenerator(gene_id="WB:WBGene00000912", module=Module.GO, data_manager=self.df, config=self.conf_parser) node_ids = generator.terms_groups[('P', '')]["EXPERIMENTAL"] common_ancestors = get_all_common_ancestors(node_ids, generator.ontology) self.assertTrue(len(common_ancestors) > 0, "Common ancestors not found") associations = [association for subj_associations in self.df.go_associations.associations_by_subj.values() for association in subj_associations] associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931", gene_symbol="", gene_type="gene", taxon_id="", object_id="GO:0043055", qualifiers="", aspect="P", ecode="EXP", references="", prvdr="WB", date="")) associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931", gene_symbol="", gene_type="gene", taxon_id="", object_id="GO:0061065", qualifiers="", aspect="P", ecode="EXP", references="", prvdr="WB", date="")) associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931", gene_symbol="", gene_type="gene", taxon_id="", object_id="GO:0043054", qualifiers="", aspect="P", ecode="EXP", references="", prvdr="WB", date="")) associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931", gene_symbol="", gene_type="gene", taxon_id="", object_id="GO:0043053", qualifiers="", aspect="P", ecode="EXP", references="", prvdr="WB", date="")) self.df.go_associations = AssociationSetFactory().create_from_assocs(assocs=associations, ontology=self.df.go_ontology) self.conf_parser.config["go_sentences_options"]["exclude_terms"].append("GO:0040024") generator = OntologySentenceGenerator(gene_id="WB:WBGene00003931", module=Module.GO, data_manager=self.df, config=self.conf_parser) node_ids = generator.terms_groups[('P', '')]["EXPERIMENTAL"] common_ancestors = get_all_common_ancestors(node_ids, generator.ontology) self.assertTrue("GO:0040024" not in common_ancestors, "Common ancestors contain blacklisted term")
def remove_blacklisted_annotations( association_set: AssociationSet, ontology: Ontology, terms_blacklist: List[str] = None) -> AssociationSet: """remove annotations linked to blacklisted ontology terms from an association set Args: association_set (AssociationSet): the original association set ontology (Ontology): the ontology linked to the annotations terms_blacklist (List[str]): the list of ontology terms related to the annotations to be removed Returns: AssociationSet: the filtered annotations """ logger.info("Removing blacklisted terms and annotations") if terms_blacklist: associations = [] for subj_associations in association_set.associations_by_subj.values( ): for association in subj_associations: if association["object"]["id"] not in terms_blacklist: associations.append(association) return AssociationSetFactory().create_from_assocs( assocs=associations, ontology=ontology) else: return association_set
def test_set_associations(self): associations = [] associations.append(DataManager.create_annotation_record("", "1", "a", "protein_coding", "001", "GO:0019901", "", "F", "EXP", None, "WB", "")) associations.append(DataManager.create_annotation_record("", "2", "b", "protein_coding", "001", "GO:0005515", "", "F", "EXP", None, "WB", "")) assocs = AssociationSetFactory().create_from_assocs(assocs=associations, ontology=self.df.go_ontology) self.df.set_associations(associations_type=DataType.GO, associations=assocs, config=self.conf_parser) self.assertTrue(self.df.go_associations)
def _load_expression_cluster_file( self, file_cache_path, file_url, load_into_data, add_to_expression_ontology_annotations: bool = False): expr_clust_file = self._get_cached_file(cache_path=file_cache_path, file_source_url=file_url) header = True associations = [] terms_ids_map = {} if add_to_expression_ontology_annotations: associations = [ association for subj_associations in self.expression_associations.associations_by_subj.values() for association in subj_associations ] terms_replacement_regex = self.config.get_module_property( module=Module.EXPRESSION, prop=ConfigModuleProperty.RENAME_TERMS) for line in open(expr_clust_file): if not header: linearr = line.strip().split("\t") load_into_data[linearr[0]] = linearr[1:] load_into_data[ linearr[0]][2] = WBDataManager.get_replaced_terms_arr( load_into_data[linearr[0]][2].split(","), terms_replacement_regex) if load_into_data[linearr[0]] and load_into_data[ linearr[0]][3]: load_into_data[linearr[0]][3] = [ word.replace(" study", "").replace(" analysis", "") for word in load_into_data[linearr[0]][3].split(",") ] if add_to_expression_ontology_annotations: for term in load_into_data[linearr[0]][2]: if term not in terms_ids_map: term_ids = self.expression_ontology.resolve_names( [term]) if term_ids: terms_ids_map[term] = term_ids[0] else: terms_ids_map[term] = None if term in terms_ids_map and terms_ids_map[term]: associations.append( DataManager.create_annotation_record( line, "WB:" + linearr[0], "", "gene", "", terms_ids_map[term], ["Enriched"], "A", "IDA", "", "", "")) else: header = False if add_to_expression_ontology_annotations: self.set_associations( DataType.EXPR, associations=AssociationSetFactory().create_from_assocs( assocs=associations, ontology=self.expression_ontology), config=self.config)
def remap_associations(associations: AssociationSet, ontology: Ontology, associations_map: Dict[str, str]): if not associations_map: return associations new_associations = [] for subj_associations in associations.associations_by_subj.values(): for association in subj_associations: if association["object"]["id"] in associations_map: association["object"]["id"] = associations_map[ association["object"]["id"]] new_associations.append(association) return AssociationSetFactory().create_from_assocs( assocs=new_associations, ontology=ontology)
def get_expression_annotations_from_db(data_provider, gd_data_manager, logger): """Get Expression Annotations From DB.""" annotations = [] gene_annot_set = Neo4jHelper.run_single_parameter_query( GeneDescriptionsETL.get_expression_annotations_query, data_provider) GeneDescriptionsETL.add_annotations( annotations, gene_annot_set, data_provider, DataType.EXPR, logger, gd_data_manager.expression_ontology) return AssociationSetFactory().create_from_assocs( assocs=list(annotations), ontology=gd_data_manager.expression_ontology)
def get_disease_annotations_from_db(data_provider, gd_data_manager, logger): """Get Disease Annotations From DB""" annotations = [] gene_annot_set = Neo4jHelper.run_single_parameter_query( GeneDescriptionsETL.get_gene_disease_annot_query, data_provider) GeneDescriptionsETL.add_annotations(annotations, gene_annot_set, data_provider, DataType.DO, logger) feature_annot_set = Neo4jHelper.run_single_parameter_query( GeneDescriptionsETL.get_feature_disease_annot_query, data_provider) allele_do_annot = defaultdict(list) for feature_annot in feature_annot_set: if all([feature_annot["geneId"] != annot[0] for annot in allele_do_annot[(feature_annot["alleleId"], feature_annot["TermId"])]]): allele_do_annot[(feature_annot["alleleId"], feature_annot["TermId"])].append(feature_annot) # keep only disease annotations through simple entities # (e.g., alleles related to one gene only) feature_annot_set = [feature_annots[0] for feature_annots in allele_do_annot.values() if len(feature_annots) == 1] GeneDescriptionsETL.add_annotations(annotations, feature_annot_set, data_provider, DataType.DO, logger) disease_via_orth_records = Neo4jHelper.run_single_parameter_query( GeneDescriptionsETL.get_disease_via_orthology_query, data_provider) for orth_annot in disease_via_orth_records: annotations.append(GeneDescriptionsETL.create_annotation_record( gene_id=orth_annot["geneId"], gene_symbol=orth_annot["geneSymbol"], term_id=orth_annot["TermId"], aspect="D", ecode="DVO", prvdr=data_provider, qualifier="")) return AssociationSetFactory().create_from_assocs(assocs=list(annotations), ontology=gd_data_manager.do_ontology)
def load_associations_from_file(self, associations_type: DataType, associations_url: str, associations_cache_path: str, config: GenedescConfigParser) -> None: """load go associations from file Args: associations_type (DataType): the type of associations to set associations_url (str): url to the association file associations_cache_path (str): path to cache file for the associations config (GenedescConfigParser): configuration object where to read properties """ logger.info("Loading associations from file") assoc_config = AssocParserConfig(remove_double_prefixes=True, paint=True) assocs = AssociationSetFactory().create_from_assocs( assocs=GafParser(config=assoc_config).parse( file=self._get_cached_file(cache_path=associations_cache_path, file_source_url=associations_url), skipheader=True), ontology=self.get_ontology(associations_type)) self.set_associations(associations_type=associations_type, associations=assocs, config=config)
def test_trimming_lca(self): self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "ic" self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "ic" gene = Gene(id="WB:WBGene00000018", name="abl-1", dead=False, pseudo=False) self.df.load_ontology_from_file(ontology_type=DataType.EXPR, ontology_url="file://" + os.path.join( self.this_dir, "data", "anatomy_ontology.WS274.obo"), ontology_cache_path=os.path.join(self.this_dir, "cache", "anatomy_ontology.WS274.obo"), config=self.conf_parser) logger.info("Loading expression associations from file") self.conf_parser.config["expression_sentences_options"]["max_num_terms"] = 5 self.conf_parser.config["expression_sentences_options"]["trim_min_distance_from_root"]["A"] = 4 self.conf_parser.config["expression_sentences_options"]["remove_children_if_parent_is_present"] = False associations = self.get_associations(gene.id, ["WBbt:0006796", "WBbt:0006759", "WBbt:0005300", "WBbt:0008598", "WBbt:0003681", "WBbt:0005829", "WBbt:0003927", "WBbt:0006751"], ["Verified"], "A", "IDA") self.df.expression_associations = AssociationSetFactory().create_from_assocs(assocs=associations, ontology=self.df.expression_ontology) self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "lca" self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "lca" gene_desc_lca = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="abl-1", add_gene_name=False) set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_lca, gene=gene) set_expression_module(self.df, self.conf_parser, gene_desc_lca, gene) gene_desc_lca.stats.calculate_stats(data_manager=self.df) self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "ic" self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "ic" set_ic_ontology_struct(ontology=self.df.go_ontology, relations=self.df.go_relations) set_ic_ontology_struct(ontology=self.df.expression_ontology, relations=self.df.expr_relations) gene_desc_ic = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="abl-1", add_gene_name=False) set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_ic, gene=gene) set_expression_module(self.df, self.conf_parser, gene_desc_ic, gene) gene_desc_ic.stats.calculate_stats(data_manager=self.df) self.assertTrue(gene_desc_lca.stats.coverage_percentage >= gene_desc_ic.stats.coverage_percentage, "1") self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "lca" self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "lca" gene = Gene(id="WB:WBGene00000022", name="aat-1", dead=False, pseudo=False) associations = self.get_associations(gene.id, ["WBbt:0005828", "WBbt:0006751", "WBbt:0005439", "WBbt:0005788", "WBbt:0006749", "WBbt:0005300", "WBbt:0005735", "WBbt:0005747", "WBbt:0005772", "WBbt:0005776", "WBbt:0005812", "WBbt:0005741", "WBbt:0005799", "WBbt:0003681"], ["Verified"], "A", "IDA") self.df.expression_associations = AssociationSetFactory().create_from_assocs( assocs=associations, ontology=self.df.expression_ontology) gene_desc_lca = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="aat-1", add_gene_name=False) set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_lca, gene=gene) set_expression_module(self.df, self.conf_parser, gene_desc_lca, gene) gene_desc_lca.stats.calculate_stats(data_manager=self.df) self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "ic" self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "ic" gene_desc_ic = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="aat-1", add_gene_name=False) set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_ic, gene=gene) set_expression_module(self.df, self.conf_parser, gene_desc_ic, gene) gene_desc_ic.stats.calculate_stats(data_manager=self.df) self.assertTrue(gene_desc_lca.stats.coverage_percentage >= gene_desc_ic.stats.coverage_percentage, "2") self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "lca" self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "lca" gene = Gene(id="WB:WBGene00000044", name="acr-5", dead=False, pseudo=False) associations = self.get_associations(gene.id, ['WBbt:0003679', 'WBbt:0006759', 'WBbt:0005336', 'WBbt:0006751', 'WBbt:0005300', 'WBbt:0005274', 'WBbt:0005741', 'WBbt:0006749', 'WBbt:0005735'], ["Verified"], "A", "IDA") self.df.expression_associations = AssociationSetFactory().create_from_assocs( assocs=associations, ontology=self.df.expression_ontology) gene_desc_lca = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="acr-5", add_gene_name=False) set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_lca, gene=gene) set_expression_module(self.df, self.conf_parser, gene_desc_lca, gene) gene_desc_lca.stats.calculate_stats(data_manager=self.df) self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "ic" self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "ic" gene_desc_ic = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="acr-5", add_gene_name=False) set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_ic, gene=gene) set_expression_module(self.df, self.conf_parser, gene_desc_ic, gene) gene_desc_ic.stats.calculate_stats(data_manager=self.df) self.assertTrue(gene_desc_lca.stats.coverage_percentage >= gene_desc_ic.stats.coverage_percentage, "3")
def load_associations_from_file( self, associations_type: DataType, associations_url: str, associations_cache_path: str, config: GenedescConfigParser, association_additional_url: str = None, association_additional_cache_path: str = None) -> None: logger.info("Loading associations from file") if associations_type == DataType.GO: super().load_associations_from_file( associations_type=associations_type, associations_url=associations_url, associations_cache_path=associations_cache_path, config=config) elif associations_type == DataType.EXPR: associations = [] file_path = self._get_cached_file( cache_path=associations_cache_path, file_source_url=associations_url) for line in open(file_path): if not line.strip().startswith("!"): linearr = line.strip().split("\t") if self.expression_ontology.node(linearr[4]): gene_id = linearr[0] + ":" + linearr[1] qualifiers = linearr[3].split("|") if len( qualifiers ) == 0 or "Partial" in qualifiers or "Certain" in qualifiers: qualifiers = ["Verified"] associations.append( DataManager.create_annotation_record( line, gene_id, linearr[2], linearr[11], linearr[12], linearr[4], qualifiers, linearr[8], linearr[6], linearr[5].split("|"), linearr[14], linearr[13])) self.expression_associations = AssociationSetFactory( ).create_from_assocs(assocs=associations, ontology=self.expression_ontology) self.expression_associations = self.remove_blacklisted_annotations( association_set=self.expression_associations, ontology=self.expression_ontology, terms_blacklist=config.get_module_property( module=Module.EXPRESSION, prop=ConfigModuleProperty.EXCLUDE_TERMS)) elif associations_type == DataType.DO: self.do_associations = AssociationSetFactory().create_from_assocs( assocs=GafParser().parse(file=self._get_cached_file( cache_path=associations_cache_path, file_source_url=associations_url), skipheader=True), ontology=self.do_ontology) if association_additional_cache_path and association_additional_url: associations = [] for subj_associations in self.do_associations.associations_by_subj.values( ): for association in subj_associations: if association["evidence"]["type"] == "IEA": associations.append(association) file_path = self._get_cached_file( cache_path=association_additional_cache_path, file_source_url=association_additional_url) header = True for line in open(file_path): if not line.strip().startswith("!"): if not header: linearr = line.strip().split("\t") if self.do_ontology.node( linearr[10]) and linearr[16] != "IEA": gene_ids = [linearr[2]] if linearr[1] == "allele": gene_ids = linearr[4].split(",") for gene_id in gene_ids: associations.append( DataManager.create_annotation_record( line, gene_id, linearr[3], linearr[1], linearr[0], linearr[10], linearr[9].split("|"), "D", linearr[16], linearr[18].split("|"), linearr[20], linearr[19])) else: header = False self.do_associations = AssociationSetFactory( ).create_from_assocs(assocs=associations, ontology=self.do_ontology) self.do_associations = self.remove_blacklisted_annotations( association_set=self.do_associations, ontology=self.do_ontology, terms_blacklist=config.get_module_property( module=Module.DO_EXPERIMENTAL, prop=ConfigModuleProperty.EXCLUDE_TERMS))
def test_set_covering_with_ontology(self): self.load_do_ontology() self.conf_parser.config["do_via_orth_sentences_options"]["trimming_algorithm"] = "ic" self.conf_parser.config["do_via_orth_sentences_options"]["max_num_terms"] = 5 associations = [DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:0080028", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:0080056", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:14789", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:0080026", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:14415", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:0080045", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:3371", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:8886", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:674", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:5614", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:11830", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:8398", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:2256", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:5327", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date=""), DataManager.create_annotation_record(source_line="", gene_id="MGI:88452", gene_symbol="", gene_type="gene", taxon_id="", object_id="DOID:1123", qualifiers="", aspect="D", ecode="ISS", references="", prvdr="WB", date="")] self.df.do_associations = AssociationSetFactory().create_from_assocs(assocs=associations, ontology=self.df.do_ontology) generator = OntologySentenceGenerator(gene_id="MGI:88452", module=Module.DO_ORTHOLOGY, data_manager=self.df, config=self.conf_parser) sentences = generator.get_module_sentences( config=self.conf_parser, aspect='D', qualifier='', merge_groups_with_same_prefix=True, keep_only_best_group=True, high_priority_term_ids=["DOID:0080028", "DOID:0080056", "DOID:14789", "DOID:0080026", "DOID:14415", "DOID:0080045"]) print(sentences.get_description())