Пример #1
0
def learn(resource, input, outdir, target_assocfile, target_ontology,
          target_root_class):
    """
    Learn association rules
    """
    logging.basicConfig(level=logging.INFO)

    afa = AssociationSetFactory()
    ofa = OntologyFactory()

    ont = ofa.create(resource)
    aset = afa.create_from_file(file=input, ontology=ont, fmt=None)

    learner = ol.OntologyLearner(assocs=aset)
    isa_ont = ont.subontology(relations=['subClassOf'])

    if target_root_class:
        learner.split_assocs(target_root_class, ontology=isa_ont)

    if target_ontology:
        learner.target_ontology = ofa.create(target_ontology)
    if target_assocfile:
        tont = ont
        if learner.target_ontology is not None:
            tont = learner.target_ontology
        learner.target_assocs = afa.create_from_file(target_assocfile,
                                                     ontology=tont,
                                                     fmt=None)

    with open(outdir + '/index.md', 'w') as file:
        learner.fit_all(dir=outdir, reportfile=file)
Пример #2
0
    def load_associations_from_file(self, associations_type: DataType,
                                    associations_url: str,
                                    associations_cache_path: str,
                                    config: GenedescConfigParser) -> None:
        """load go associations from file

        Args:
            associations_type (DataType): the type of associations to set
            associations_url (str): url to the association file
            associations_cache_path (str): path to cache file for the associations
            config (GenedescConfigParser): configuration object where to read properties
        """
        assoc_config = AssocParserConfig(remove_double_prefixes=True,
                                         paint=True)
        if associations_type == DataType.GO:
            logger.info("Loading GO associations from file")
            self.go_associations = AssociationSetFactory().create_from_assocs(
                assocs=GafParser(config=assoc_config).parse(
                    file=self._get_cached_file(
                        cache_path=associations_cache_path,
                        file_source_url=associations_url),
                    skipheader=True),
                ontology=self.go_ontology)
            self.go_associations = self.remove_blacklisted_annotations(
                association_set=self.go_associations,
                ontology=self.go_ontology,
                terms_blacklist=config.get_module_property(
                    module=Module.GO, prop=ConfigModuleProperty.EXCLUDE_TERMS))
        elif associations_type == DataType.DO:
            logger.info("Loading DO associations from file")
            self.do_associations = AssociationSetFactory().create_from_assocs(
                assocs=GafParser(config=assoc_config).parse(
                    file=self._get_cached_file(
                        cache_path=associations_cache_path,
                        file_source_url=associations_url),
                    skipheader=True),
                ontology=self.do_ontology)
            self.do_associations = self.remove_blacklisted_annotations(
                association_set=self.do_associations,
                ontology=self.do_ontology,
                terms_blacklist=config.get_module_property(
                    module=Module.DO_EXP_AND_BIO,
                    prop=ConfigModuleProperty.EXCLUDE_TERMS))
        elif associations_type == DataType.EXPR:
            logger.info("Loading Expression associations from file")
            self.expression_associations = AssociationSetFactory(
            ).create_from_assocs(assocs=GafParser(config=assoc_config).parse(
                file=self._get_cached_file(cache_path=associations_cache_path,
                                           file_source_url=associations_url),
                skipheader=True),
                                 ontology=self.expression_ontology)
            self.expression_associations = self.remove_blacklisted_annotations(
                association_set=self.expression_associations,
                ontology=self.expression_ontology,
                terms_blacklist=config.get_module_property(
                    module=Module.EXPRESSION,
                    prop=ConfigModuleProperty.EXCLUDE_TERMS))
 def test_remap_associations(self):
     associations = []
     associations.append(DataManager.create_annotation_record("", "1", "a", "protein_coding", "001", "GO:0018996",
                                                              "", "F", "EXP", None, "WB", ""))
     assocs = AssociationSetFactory().create_from_assocs(assocs=associations, ontology=self.df.go_ontology)
     self.df.set_associations(associations_type=DataType.GO, associations=assocs, config=self.conf_parser)
     self.assertEqual(self.df.go_associations.associations_by_subj["1"][0]["object"]["id"], "GO:0042303")
 def test_get_common_ancestors(self):
     self.load_go_ontology()
     generator = OntologySentenceGenerator(gene_id="WB:WBGene00000912", module=Module.GO,
                                           data_manager=self.df, config=self.conf_parser)
     node_ids = generator.terms_groups[('P', '')]["EXPERIMENTAL"]
     common_ancestors = get_all_common_ancestors(node_ids, generator.ontology)
     self.assertTrue(len(common_ancestors) > 0, "Common ancestors not found")
     associations = [association for subj_associations in self.df.go_associations.associations_by_subj.values() for
                     association in subj_associations]
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0043055", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0061065", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0043054", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0043053", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     self.df.go_associations = AssociationSetFactory().create_from_assocs(assocs=associations,
                                                                          ontology=self.df.go_ontology)
     self.conf_parser.config["go_sentences_options"]["exclude_terms"].append("GO:0040024")
     generator = OntologySentenceGenerator(gene_id="WB:WBGene00003931", module=Module.GO,
                                                   data_manager=self.df, config=self.conf_parser)
     node_ids = generator.terms_groups[('P', '')]["EXPERIMENTAL"]
     common_ancestors = get_all_common_ancestors(node_ids, generator.ontology)
     self.assertTrue("GO:0040024" not in common_ancestors, "Common ancestors contain blacklisted term")
Пример #5
0
    def remove_blacklisted_annotations(
            association_set: AssociationSet,
            ontology: Ontology,
            terms_blacklist: List[str] = None) -> AssociationSet:
        """remove annotations linked to blacklisted ontology terms from an association set

        Args:
            association_set (AssociationSet): the original association set
            ontology (Ontology): the ontology linked to the annotations
            terms_blacklist (List[str]): the list of ontology terms related to the annotations to be removed
        Returns:
            AssociationSet: the filtered annotations
        """
        logger.info("Removing blacklisted terms and annotations")
        if terms_blacklist:
            associations = []
            for subj_associations in association_set.associations_by_subj.values(
            ):
                for association in subj_associations:
                    if association["object"]["id"] not in terms_blacklist:
                        associations.append(association)
            return AssociationSetFactory().create_from_assocs(
                assocs=associations, ontology=ontology)
        else:
            return association_set
 def test_set_associations(self):
     associations = []
     associations.append(DataManager.create_annotation_record("", "1", "a", "protein_coding", "001", "GO:0019901",
                                                              "", "F", "EXP", None, "WB", ""))
     associations.append(DataManager.create_annotation_record("", "2", "b", "protein_coding", "001", "GO:0005515",
                                                              "", "F", "EXP", None, "WB", ""))
     assocs = AssociationSetFactory().create_from_assocs(assocs=associations, ontology=self.df.go_ontology)
     self.df.set_associations(associations_type=DataType.GO, associations=assocs, config=self.conf_parser)
     self.assertTrue(self.df.go_associations)
 def _load_expression_cluster_file(
         self,
         file_cache_path,
         file_url,
         load_into_data,
         add_to_expression_ontology_annotations: bool = False):
     expr_clust_file = self._get_cached_file(cache_path=file_cache_path,
                                             file_source_url=file_url)
     header = True
     associations = []
     terms_ids_map = {}
     if add_to_expression_ontology_annotations:
         associations = [
             association for subj_associations in
             self.expression_associations.associations_by_subj.values()
             for association in subj_associations
         ]
     terms_replacement_regex = self.config.get_module_property(
         module=Module.EXPRESSION, prop=ConfigModuleProperty.RENAME_TERMS)
     for line in open(expr_clust_file):
         if not header:
             linearr = line.strip().split("\t")
             load_into_data[linearr[0]] = linearr[1:]
             load_into_data[
                 linearr[0]][2] = WBDataManager.get_replaced_terms_arr(
                     load_into_data[linearr[0]][2].split(","),
                     terms_replacement_regex)
             if load_into_data[linearr[0]] and load_into_data[
                     linearr[0]][3]:
                 load_into_data[linearr[0]][3] = [
                     word.replace(" study", "").replace(" analysis", "")
                     for word in load_into_data[linearr[0]][3].split(",")
                 ]
             if add_to_expression_ontology_annotations:
                 for term in load_into_data[linearr[0]][2]:
                     if term not in terms_ids_map:
                         term_ids = self.expression_ontology.resolve_names(
                             [term])
                         if term_ids:
                             terms_ids_map[term] = term_ids[0]
                         else:
                             terms_ids_map[term] = None
                     if term in terms_ids_map and terms_ids_map[term]:
                         associations.append(
                             DataManager.create_annotation_record(
                                 line, "WB:" + linearr[0], "", "gene", "",
                                 terms_ids_map[term], ["Enriched"], "A",
                                 "IDA", "", "", ""))
         else:
             header = False
     if add_to_expression_ontology_annotations:
         self.set_associations(
             DataType.EXPR,
             associations=AssociationSetFactory().create_from_assocs(
                 assocs=associations, ontology=self.expression_ontology),
             config=self.config)
Пример #8
0
 def remap_associations(associations: AssociationSet, ontology: Ontology,
                        associations_map: Dict[str, str]):
     if not associations_map:
         return associations
     new_associations = []
     for subj_associations in associations.associations_by_subj.values():
         for association in subj_associations:
             if association["object"]["id"] in associations_map:
                 association["object"]["id"] = associations_map[
                     association["object"]["id"]]
             new_associations.append(association)
     return AssociationSetFactory().create_from_assocs(
         assocs=new_associations, ontology=ontology)
 def get_expression_annotations_from_db(data_provider, gd_data_manager,
                                        logger):
     """Get Expression Annotations From DB."""
     annotations = []
     gene_annot_set = Neo4jHelper.run_single_parameter_query(
         GeneDescriptionsETL.get_expression_annotations_query,
         data_provider)
     GeneDescriptionsETL.add_annotations(
         annotations, gene_annot_set, data_provider, DataType.EXPR, logger,
         gd_data_manager.expression_ontology)
     return AssociationSetFactory().create_from_assocs(
         assocs=list(annotations),
         ontology=gd_data_manager.expression_ontology)
    def get_disease_annotations_from_db(data_provider, gd_data_manager, logger):
        """Get Disease Annotations From DB"""

        annotations = []
        gene_annot_set = Neo4jHelper.run_single_parameter_query(
            GeneDescriptionsETL.get_gene_disease_annot_query,
            data_provider)
        GeneDescriptionsETL.add_annotations(annotations,
                                            gene_annot_set,
                                            data_provider,
                                            DataType.DO,
                                            logger)

        feature_annot_set = Neo4jHelper.run_single_parameter_query(
            GeneDescriptionsETL.get_feature_disease_annot_query,
            data_provider)
        allele_do_annot = defaultdict(list)
        for feature_annot in feature_annot_set:
            if all([feature_annot["geneId"] != annot[0]
                    for annot in allele_do_annot[(feature_annot["alleleId"],
                                                  feature_annot["TermId"])]]):
                allele_do_annot[(feature_annot["alleleId"],
                                 feature_annot["TermId"])].append(feature_annot)
        # keep only disease annotations through simple entities
        # (e.g., alleles related to one gene only)
        feature_annot_set = [feature_annots[0] for feature_annots in allele_do_annot.values() if
                             len(feature_annots) == 1]
        GeneDescriptionsETL.add_annotations(annotations,
                                            feature_annot_set,
                                            data_provider,
                                            DataType.DO,
                                            logger)
        disease_via_orth_records = Neo4jHelper.run_single_parameter_query(
            GeneDescriptionsETL.get_disease_via_orthology_query, data_provider)
        for orth_annot in disease_via_orth_records:
            annotations.append(GeneDescriptionsETL.create_annotation_record(
                gene_id=orth_annot["geneId"],
                gene_symbol=orth_annot["geneSymbol"],
                term_id=orth_annot["TermId"],
                aspect="D",
                ecode="DVO",
                prvdr=data_provider,
                qualifier=""))
        return AssociationSetFactory().create_from_assocs(assocs=list(annotations),
                                                          ontology=gd_data_manager.do_ontology)
Пример #11
0
    def load_associations_from_file(self, associations_type: DataType,
                                    associations_url: str,
                                    associations_cache_path: str,
                                    config: GenedescConfigParser) -> None:
        """load go associations from file

        Args:
            associations_type (DataType): the type of associations to set
            associations_url (str): url to the association file
            associations_cache_path (str): path to cache file for the associations
            config (GenedescConfigParser): configuration object where to read properties
        """
        logger.info("Loading associations from file")
        assoc_config = AssocParserConfig(remove_double_prefixes=True,
                                         paint=True)
        assocs = AssociationSetFactory().create_from_assocs(
            assocs=GafParser(config=assoc_config).parse(
                file=self._get_cached_file(cache_path=associations_cache_path,
                                           file_source_url=associations_url),
                skipheader=True),
            ontology=self.get_ontology(associations_type))
        self.set_associations(associations_type=associations_type,
                              associations=assocs,
                              config=config)
Пример #12
0
    def test_trimming_lca(self):
        self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "ic"
        self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "ic"
        gene = Gene(id="WB:WBGene00000018", name="abl-1", dead=False, pseudo=False)
        self.df.load_ontology_from_file(ontology_type=DataType.EXPR, ontology_url="file://" + os.path.join(
            self.this_dir, "data", "anatomy_ontology.WS274.obo"),
                                        ontology_cache_path=os.path.join(self.this_dir, "cache",
                                                                         "anatomy_ontology.WS274.obo"),
                                        config=self.conf_parser)
        logger.info("Loading expression associations from file")
        self.conf_parser.config["expression_sentences_options"]["max_num_terms"] = 5
        self.conf_parser.config["expression_sentences_options"]["trim_min_distance_from_root"]["A"] = 4
        self.conf_parser.config["expression_sentences_options"]["remove_children_if_parent_is_present"] = False
        associations = self.get_associations(gene.id, ["WBbt:0006796", "WBbt:0006759", "WBbt:0005300", "WBbt:0008598",
                                                       "WBbt:0003681", "WBbt:0005829", "WBbt:0003927", "WBbt:0006751"],
                                             ["Verified"], "A", "IDA")
        self.df.expression_associations = AssociationSetFactory().create_from_assocs(assocs=associations,
                                                                                     ontology=self.df.expression_ontology)
        self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "lca"
        self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "lca"
        gene_desc_lca = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="abl-1",
                                        add_gene_name=False)
        set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_lca, gene=gene)
        set_expression_module(self.df, self.conf_parser, gene_desc_lca, gene)
        gene_desc_lca.stats.calculate_stats(data_manager=self.df)
        self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "ic"
        self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "ic"
        set_ic_ontology_struct(ontology=self.df.go_ontology, relations=self.df.go_relations)
        set_ic_ontology_struct(ontology=self.df.expression_ontology, relations=self.df.expr_relations)
        gene_desc_ic = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="abl-1",
                                       add_gene_name=False)
        set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_ic, gene=gene)
        set_expression_module(self.df, self.conf_parser, gene_desc_ic, gene)
        gene_desc_ic.stats.calculate_stats(data_manager=self.df)
        self.assertTrue(gene_desc_lca.stats.coverage_percentage >= gene_desc_ic.stats.coverage_percentage, "1")

        self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "lca"
        self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "lca"
        gene = Gene(id="WB:WBGene00000022", name="aat-1", dead=False, pseudo=False)
        associations = self.get_associations(gene.id, ["WBbt:0005828", "WBbt:0006751", "WBbt:0005439", "WBbt:0005788",
                                                       "WBbt:0006749", "WBbt:0005300", "WBbt:0005735", "WBbt:0005747",
                                                       "WBbt:0005772", "WBbt:0005776", "WBbt:0005812", "WBbt:0005741",
                                                       "WBbt:0005799", "WBbt:0003681"],
                                             ["Verified"], "A", "IDA")
        self.df.expression_associations = AssociationSetFactory().create_from_assocs(
            assocs=associations, ontology=self.df.expression_ontology)
        gene_desc_lca = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="aat-1",
                                        add_gene_name=False)
        set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_lca, gene=gene)
        set_expression_module(self.df, self.conf_parser, gene_desc_lca, gene)
        gene_desc_lca.stats.calculate_stats(data_manager=self.df)
        self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "ic"
        self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "ic"
        gene_desc_ic = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="aat-1",
                                       add_gene_name=False)
        set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_ic, gene=gene)
        set_expression_module(self.df, self.conf_parser, gene_desc_ic, gene)
        gene_desc_ic.stats.calculate_stats(data_manager=self.df)
        self.assertTrue(gene_desc_lca.stats.coverage_percentage >= gene_desc_ic.stats.coverage_percentage, "2")

        self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "lca"
        self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "lca"
        gene = Gene(id="WB:WBGene00000044", name="acr-5", dead=False, pseudo=False)
        associations = self.get_associations(gene.id, ['WBbt:0003679', 'WBbt:0006759', 'WBbt:0005336', 'WBbt:0006751',
                                                       'WBbt:0005300', 'WBbt:0005274', 'WBbt:0005741', 'WBbt:0006749',
                                                       'WBbt:0005735'],
                                             ["Verified"], "A", "IDA")
        self.df.expression_associations = AssociationSetFactory().create_from_assocs(
            assocs=associations, ontology=self.df.expression_ontology)
        gene_desc_lca = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="acr-5",
                                        add_gene_name=False)
        set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_lca, gene=gene)
        set_expression_module(self.df, self.conf_parser, gene_desc_lca, gene)
        gene_desc_lca.stats.calculate_stats(data_manager=self.df)
        self.conf_parser.config["go_sentences_options"]["trimming_algorithm"] = "ic"
        self.conf_parser.config["expression_sentences_options"]["trimming_algorithm"] = "ic"
        gene_desc_ic = GeneDescription(gene_id=gene.id, config=self.conf_parser, gene_name="acr-5",
                                       add_gene_name=False)
        set_gene_ontology_module(dm=self.df, conf_parser=self.conf_parser, gene_desc=gene_desc_ic, gene=gene)
        set_expression_module(self.df, self.conf_parser, gene_desc_ic, gene)
        gene_desc_ic.stats.calculate_stats(data_manager=self.df)
        self.assertTrue(gene_desc_lca.stats.coverage_percentage >= gene_desc_ic.stats.coverage_percentage, "3")
Пример #13
0
 def load_associations_from_file(
         self,
         associations_type: DataType,
         associations_url: str,
         associations_cache_path: str,
         config: GenedescConfigParser,
         association_additional_url: str = None,
         association_additional_cache_path: str = None) -> None:
     logger.info("Loading associations from file")
     if associations_type == DataType.GO:
         super().load_associations_from_file(
             associations_type=associations_type,
             associations_url=associations_url,
             associations_cache_path=associations_cache_path,
             config=config)
     elif associations_type == DataType.EXPR:
         associations = []
         file_path = self._get_cached_file(
             cache_path=associations_cache_path,
             file_source_url=associations_url)
         for line in open(file_path):
             if not line.strip().startswith("!"):
                 linearr = line.strip().split("\t")
                 if self.expression_ontology.node(linearr[4]):
                     gene_id = linearr[0] + ":" + linearr[1]
                     qualifiers = linearr[3].split("|")
                     if len(
                             qualifiers
                     ) == 0 or "Partial" in qualifiers or "Certain" in qualifiers:
                         qualifiers = ["Verified"]
                     associations.append(
                         DataManager.create_annotation_record(
                             line, gene_id, linearr[2], linearr[11],
                             linearr[12], linearr[4], qualifiers,
                             linearr[8], linearr[6], linearr[5].split("|"),
                             linearr[14], linearr[13]))
         self.expression_associations = AssociationSetFactory(
         ).create_from_assocs(assocs=associations,
                              ontology=self.expression_ontology)
         self.expression_associations = self.remove_blacklisted_annotations(
             association_set=self.expression_associations,
             ontology=self.expression_ontology,
             terms_blacklist=config.get_module_property(
                 module=Module.EXPRESSION,
                 prop=ConfigModuleProperty.EXCLUDE_TERMS))
     elif associations_type == DataType.DO:
         self.do_associations = AssociationSetFactory().create_from_assocs(
             assocs=GafParser().parse(file=self._get_cached_file(
                 cache_path=associations_cache_path,
                 file_source_url=associations_url),
                                      skipheader=True),
             ontology=self.do_ontology)
         if association_additional_cache_path and association_additional_url:
             associations = []
             for subj_associations in self.do_associations.associations_by_subj.values(
             ):
                 for association in subj_associations:
                     if association["evidence"]["type"] == "IEA":
                         associations.append(association)
             file_path = self._get_cached_file(
                 cache_path=association_additional_cache_path,
                 file_source_url=association_additional_url)
             header = True
             for line in open(file_path):
                 if not line.strip().startswith("!"):
                     if not header:
                         linearr = line.strip().split("\t")
                         if self.do_ontology.node(
                                 linearr[10]) and linearr[16] != "IEA":
                             gene_ids = [linearr[2]]
                             if linearr[1] == "allele":
                                 gene_ids = linearr[4].split(",")
                             for gene_id in gene_ids:
                                 associations.append(
                                     DataManager.create_annotation_record(
                                         line, gene_id, linearr[3],
                                         linearr[1], linearr[0],
                                         linearr[10], linearr[9].split("|"),
                                         "D", linearr[16],
                                         linearr[18].split("|"),
                                         linearr[20], linearr[19]))
                     else:
                         header = False
             self.do_associations = AssociationSetFactory(
             ).create_from_assocs(assocs=associations,
                                  ontology=self.do_ontology)
         self.do_associations = self.remove_blacklisted_annotations(
             association_set=self.do_associations,
             ontology=self.do_ontology,
             terms_blacklist=config.get_module_property(
                 module=Module.DO_EXPERIMENTAL,
                 prop=ConfigModuleProperty.EXCLUDE_TERMS))
 def test_set_covering_with_ontology(self):
     self.load_do_ontology()
     self.conf_parser.config["do_via_orth_sentences_options"]["trimming_algorithm"] = "ic"
     self.conf_parser.config["do_via_orth_sentences_options"]["max_num_terms"] = 5
     associations = [DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080028", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080056", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:14789", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080026", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:14415", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080045", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:3371", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:8886", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:674", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:5614", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:11830", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:8398", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:2256", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:5327", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:1123", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date="")]
     self.df.do_associations = AssociationSetFactory().create_from_assocs(assocs=associations,
                                                                          ontology=self.df.do_ontology)
     generator = OntologySentenceGenerator(gene_id="MGI:88452", module=Module.DO_ORTHOLOGY,
                                           data_manager=self.df, config=self.conf_parser)
     sentences = generator.get_module_sentences(
         config=self.conf_parser, aspect='D', qualifier='', merge_groups_with_same_prefix=True,
         keep_only_best_group=True, high_priority_term_ids=["DOID:0080028", "DOID:0080056", "DOID:14789",
                                                            "DOID:0080026", "DOID:14415", "DOID:0080045"])
     print(sentences.get_description())