def set_sister_species_sentence(dm: WBDataManager,
                                conf_parser: GenedescConfigParser,
                                sister_sp_fullname, sister_df: WBDataManager,
                                species, organism, gene_desc: GeneDescription,
                                gene: Gene):
    best_ortholog = dm.get_best_orthologs_for_gene(
        gene_desc.gene_id,
        orth_species_full_name=[sister_sp_fullname],
        sister_species_data_fetcher=sister_df,
        ecode_priority_list=[
            "EXP", "IDA", "IPI", "IMP", "IGI", "IEP", "HTP", "HDA", "HMP",
            "HGI", "HEP"
        ])[0][0]
    if not best_ortholog[0].startswith("WB:"):
        best_ortholog[0] = "WB:" + best_ortholog[0]
    sister_sentences_generator = OntologySentenceGenerator(
        gene_id=best_ortholog[0],
        module=Module.GO,
        data_manager=sister_df,
        config=conf_parser,
        humans=sister_sp_fullname == "H**o sapiens",
        limit_to_group="EXPERIMENTAL")
    sister_sp_module_sentences = sister_sentences_generator.get_module_sentences(
        aspect='P',
        qualifier="involved_in",
        merge_groups_with_same_prefix=True,
        keep_only_best_group=True)
    if sister_sp_module_sentences.contains_sentences():
        gene_desc.set_or_extend_module_description_and_final_stats(
            module=Module.SISTER_SP,
            description="in " +
            species[species[organism]["main_sister_species"]]["name"] + ", " +
            best_ortholog[1] + " " +
            sister_sp_module_sentences.get_description())
 def test_get_common_ancestors(self):
     self.load_go_ontology()
     generator = OntologySentenceGenerator(gene_id="WB:WBGene00000912", module=Module.GO,
                                           data_manager=self.df, config=self.conf_parser)
     node_ids = generator.terms_groups[('P', '')]["EXPERIMENTAL"]
     common_ancestors = get_all_common_ancestors(node_ids, generator.ontology)
     self.assertTrue(len(common_ancestors) > 0, "Common ancestors not found")
     associations = [association for subj_associations in self.df.go_associations.associations_by_subj.values() for
                     association in subj_associations]
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0043055", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0061065", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0043054", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     associations.append(DataManager.create_annotation_record(source_line="", gene_id="WB:WBGene00003931",
                                                              gene_symbol="", gene_type="gene", taxon_id="",
                                                              object_id="GO:0043053", qualifiers="", aspect="P",
                                                              ecode="EXP", references="", prvdr="WB", date=""))
     self.df.go_associations = AssociationSetFactory().create_from_assocs(assocs=associations,
                                                                          ontology=self.df.go_ontology)
     self.conf_parser.config["go_sentences_options"]["exclude_terms"].append("GO:0040024")
     generator = OntologySentenceGenerator(gene_id="WB:WBGene00003931", module=Module.GO,
                                                   data_manager=self.df, config=self.conf_parser)
     node_ids = generator.terms_groups[('P', '')]["EXPERIMENTAL"]
     common_ancestors = get_all_common_ancestors(node_ids, generator.ontology)
     self.assertTrue("GO:0040024" not in common_ancestors, "Common ancestors contain blacklisted term")
 def test_expression_the_cell_renaming_to_widely(self):
     self.df.load_ontology_from_file(
         ontology_type=DataType.EXPR,
         ontology_url=self.df.expression_ontology_url,
         ontology_cache_path=self.df.expression_ontology_cache_path,
         config=self.conf_parser)
     self.df.load_associations_from_file(
         associations_type=DataType.EXPR,
         associations_url=self.df.expression_associations_url,
         associations_cache_path=self.df.expression_associations_cache_path,
         config=self.conf_parser)
     gene_desc = GeneDescription(gene_id="WB:WBGene00007352",
                                 gene_name="cdc-48.1",
                                 add_gene_name=False)
     expr_sentence_generator = OntologySentenceGenerator(
         gene_id=gene_desc.gene_id,
         module=Module.EXPRESSION,
         data_manager=self.df,
         config=self.conf_parser)
     expression_module_sentences = expr_sentence_generator.get_module_sentences(
         config=self.conf_parser,
         aspect='A',
         qualifier="Verified",
         merge_groups_with_same_prefix=True,
         keep_only_best_group=False)
     gene_desc.set_or_extend_module_description_and_final_stats(
         module_sentences=expression_module_sentences,
         module=Module.EXPRESSION)
     self.assertTrue("is expressed widely" in gene_desc.description)
Пример #4
0
def set_information_poor_sentence(orth_fullnames: List[str], selected_orthologs, ensembl_hgnc_ids_map,
                                  conf_parser: GenedescConfigParser, human_df_agr: DataManager,
                                  gene_desc: GeneDescription, dm: WBDataManager, gene: Gene):
    if len(orth_fullnames) == 1 and orth_fullnames[0] == "H**o sapiens":
        best_orth = get_best_human_ortholog_for_info_poor(selected_orthologs, ensembl_hgnc_ids_map,
                                                          conf_parser.get_annotations_priority(module=Module.GO),
                                                          human_df_agr, config=conf_parser)
        if best_orth:
            if not best_orth.startswith("RGD:"):
                best_orth = "RGD:" + best_orth
            human_go_sent_generator = OntologySentenceGenerator(gene_id=best_orth, module=Module.GO,
                                                                data_manager=human_df_agr, config=conf_parser,
                                                                humans=False, limit_to_group="EXPERIMENTAL")
            human_func_module_sentences = human_go_sent_generator.get_module_sentences(
                config=conf_parser, aspect='F', merge_groups_with_same_prefix=True, keep_only_best_group=True)
            human_func_sent = human_func_module_sentences.get_description()
            if human_func_sent:
                gene_desc.set_or_extend_module_description_and_final_stats(
                    module=Module.INFO_POOR_HUMAN_FUNCTION, description="human " +
                                                                        human_df_agr.go_associations.subject_label_map[
                                                                            best_orth] + " " + human_func_sent)

    protein_domains = dm.protein_domains[gene_desc.gene_id[3:]]
    if protein_domains:
        dom_word = "domain"
        if len(protein_domains) > 1:
            dom_word = "domains"
        gene_desc.set_or_extend_module_description_and_final_stats(
            module=Module.PROTEIN_DOMAIN,
            description="is predicted to encode a protein with the following " + dom_word + ": " +
                        concatenate_words_with_oxford_comma([ptdom[1] if ptdom[1] != "" else ptdom[0] for
                                                             ptdom in protein_domains]))
Пример #5
0
def set_tissue_expression_sentence(dm, gene, conf_parser, gene_desc):
    expr_sentence_generator = OntologySentenceGenerator(gene_id=gene.id, module=Module.EXPRESSION, data_manager=dm,
                                                        config=conf_parser)
    expression_module_sentences = expr_sentence_generator.get_module_sentences(
        config=conf_parser, aspect='A', qualifier="Verified", merge_groups_with_same_prefix=True,
        keep_only_best_group=False)
    gene_desc.set_or_extend_module_description_and_final_stats(module_sentences=expression_module_sentences,
                                                               module=Module.EXPRESSION)
    gene_desc.set_initial_stats(module=Module.EXPRESSION, sentence_generator=expr_sentence_generator)
Пример #6
0
def set_expression_module(df: DataManager, conf_parser: GenedescConfigParser, gene_desc: GeneDescription, gene: Gene):
    expr_sentence_generator = OntologySentenceGenerator(gene_id=gene.id, module=Module.EXPRESSION, data_manager=df,
                                                        config=conf_parser)
    expression_module_sentences = expr_sentence_generator.get_module_sentences(
        aspect='A', qualifier="Verified", merge_groups_with_same_prefix=True, keep_only_best_group=False)
    gene_desc.set_or_extend_module_description_and_final_stats(module_sentences=expression_module_sentences,
                                                               module=Module.EXPRESSION)
    gene_desc.set_or_update_initial_stats(module=Module.EXPRESSION, sent_generator=expr_sentence_generator,
                                          module_sentences=expression_module_sentences)
Пример #7
0
def set_disease_module(df: DataManager, conf_parser: GenedescConfigParser, gene_desc: GeneDescription, gene: Gene,
                       human: bool = False):
    do_sentence_exp_generator = OntologySentenceGenerator(gene_id=gene.id,
                                                          module=Module.DO_EXPERIMENTAL, data_manager=df,
                                                          config=conf_parser, limit_to_group="EXPERIMENTAL",
                                                          humans=human)
    disease_exp_module_sentences = do_sentence_exp_generator.get_module_sentences(
        aspect='D', merge_groups_with_same_prefix=True, keep_only_best_group=False)
    gene_desc.set_or_extend_module_description_and_final_stats(module=Module.DO_EXPERIMENTAL,
                                                               module_sentences=disease_exp_module_sentences)
    do_sentence_bio_generator = OntologySentenceGenerator(gene_id=gene.id, module=Module.DO_BIOMARKER,
                                                          data_manager=df, config=conf_parser,
                                                          limit_to_group="BIOMARKER", humans=human)
    disease_bio_module_sentences = do_sentence_bio_generator.get_module_sentences(
        aspect='D', merge_groups_with_same_prefix=True, keep_only_best_group=False)
    gene_desc.set_or_extend_module_description_and_final_stats(module=Module.DO_BIOMARKER,
                                                               module_sentences=disease_bio_module_sentences)
    do_via_orth_sentence_generator = OntologySentenceGenerator(
        gene_id=gene.id, module=Module.DO_ORTHOLOGY, data_manager=df, config=conf_parser, humans=human)
    disease_via_orth_module_sentences = do_via_orth_sentence_generator.get_module_sentences(
        aspect='D', merge_groups_with_same_prefix=True, keep_only_best_group=False)
    gene_desc.set_or_extend_module_description_and_final_stats(module=Module.DO_ORTHOLOGY,
                                                               module_sentences=disease_via_orth_module_sentences)
    gene_desc.set_or_update_initial_stats(module=Module.DO_EXPERIMENTAL, sent_generator=do_sentence_exp_generator,
                                          module_sentences=disease_exp_module_sentences)
    gene_desc.set_or_update_initial_stats(module=Module.DO_BIOMARKER, sent_generator=do_sentence_bio_generator,
                                          module_sentences=disease_bio_module_sentences)
    gene_desc.set_or_update_initial_stats(module=Module.DO_ORTHOLOGY, sent_generator=do_via_orth_sentence_generator,
                                          module_sentences=disease_via_orth_module_sentences)
 def test_set_or_extend_module_description_and_final_stats(self):
     gene_desc = GeneDescription(gene_id="FB:FBgn0027655", gene_name="Test gene", add_gene_name=False)
     go_sent_generator = OntologySentenceGenerator(gene_id="FB:FBgn0027655", module=Module.GO,
                                                   data_manager=self.df, config=self.conf_parser)
     sentences = go_sent_generator.get_module_sentences(config=self.conf_parser, aspect='P',
                                                        qualifier='', merge_groups_with_same_prefix=True,
                                                        keep_only_best_group=True)
     gene_desc.set_or_extend_module_description_and_final_stats(module=Module.GO_PROCESS, module_sentences=sentences)
     self.assertTrue(gene_desc.description, "Is involved in several processes, including axo-dendritic transport, "
                                            "establishment of mitotic spindle orientation, and positive regulation "
                                            "of extent of heterochromatin assembly")
     gene_desc = GeneDescription(gene_id="FB:FBgn0027655", gene_name="Test gene", add_gene_name=True)
     gene_desc.set_or_extend_module_description_and_final_stats(module=Module.GO_PROCESS, module_sentences=sentences)
     self.assertTrue(gene_desc.description, "Test gene is involved in several processes, including axo-dendritic "
                                            "transport, establishment of mitotic spindle orientation, and positive "
                                            "regulation of extent of heterochromatin assembly")
def set_expression_cluster_sentence(dm: WBDataManager,
                                    conf_parser: GenedescConfigParser,
                                    gene_desc: GeneDescription, gene: Gene,
                                    api_manager: APIManager):

    expr_sentence_generator = OntologySentenceGenerator(
        gene_id=gene.id,
        module=Module.EXPRESSION,
        data_manager=dm,
        config=conf_parser)
    ec_gene_id = gene_desc.gene_id[3:]
    ec_anatomy_studies = dm.get_expression_cluster_feature(
        gene_id=ec_gene_id,
        expression_cluster_type=ExpressionClusterType.ANATOMY,
        feature=ExpressionClusterFeature.STUDIES)
    ec_anatomy_terms = dm.get_expression_cluster_feature(
        gene_id=ec_gene_id,
        feature=ExpressionClusterFeature.TERMS,
        expression_cluster_type=ExpressionClusterType.ANATOMY)
    if dm.expression_ontology is not None:
        expression_enriched_module_sentences = expr_sentence_generator.get_module_sentences(
            aspect='A',
            qualifier="Enriched",
            merge_groups_with_same_prefix=True,
            keep_only_best_group=False)
        gene_desc.set_or_extend_module_description_and_final_stats(
            module=Module.EXPRESSION_CLUSTER_ANATOMY,
            description=expression_enriched_module_sentences.get_description(),
            additional_postfix_terms_list=ec_anatomy_studies,
            additional_postfix_final_word="studies",
            use_single_form=True)
    elif ec_anatomy_terms:
        gene_desc.set_or_extend_module_description_and_final_stats(
            module=Module.EXPRESSION_CLUSTER_ANATOMY,
            description="is enriched in " +
            concatenate_words_with_oxford_comma(
                ec_anatomy_terms,
                separator=conf_parser.get_terms_delimiter()) + " based on",
            additional_postfix_terms_list=ec_anatomy_studies,
            additional_postfix_final_word="studies",
            use_single_form=True)
    ec_molreg_terms = dm.get_expression_cluster_feature(
        gene_id=ec_gene_id,
        expression_cluster_type=ExpressionClusterType.MOLREG,
        feature=ExpressionClusterFeature.TERMS)
    ec_molreg_studies = dm.get_expression_cluster_feature(
        gene_id=ec_gene_id,
        feature=ExpressionClusterFeature.STUDIES,
        expression_cluster_type=ExpressionClusterType.MOLREG)
    ec_genereg_terms = dm.get_expression_cluster_feature(
        gene_id=ec_gene_id,
        expression_cluster_type=ExpressionClusterType.GENEREG,
        feature=ExpressionClusterFeature.TERMS)
    ec_genereg_studies = dm.get_expression_cluster_feature(
        gene_id=ec_gene_id,
        feature=ExpressionClusterFeature.STUDIES,
        expression_cluster_type=ExpressionClusterType.GENEREG)
    if ec_genereg_terms:
        several_word = ""
        if len(ec_genereg_terms) > 3:
            t_p = [
                t_p for t_p in sorted(
                    [[term, api_manager.get_textpresso_popularity(term)]
                     for term in ec_genereg_terms],
                    key=lambda x: (x[1], x[0][1]),
                    reverse=True)
            ]
            ec_genereg_terms = [term for term, popularity in t_p[0:3]]
            several_word = "several genes including "
        gene_desc.set_or_extend_module_description_and_final_stats(
            module=Module.EXPRESSION_CLUSTER_GENE,
            description="is affected by " + several_word +
            concatenate_words_with_oxford_comma(
                ec_genereg_terms,
                separator=conf_parser.get_terms_delimiter()) + " based on",
            additional_postfix_terms_list=ec_genereg_studies,
            additional_postfix_final_word="studies",
            use_single_form=True)
    if ec_molreg_terms:
        several_word = ""
        if len(ec_molreg_terms) > 3:
            several_word = num2words(
                len(ec_molreg_terms)) + " chemicals including "
        gene_desc.set_or_extend_module_description_and_final_stats(
            module=Module.EXPRESSION_CLUSTER_MOLECULE,
            description="is affected by " + several_word +
            concatenate_words_with_oxford_comma(
                ec_molreg_terms[0:3],
                separator=conf_parser.get_terms_delimiter()) + " based on",
            additional_postfix_terms_list=ec_molreg_studies,
            additional_postfix_final_word="studies",
            use_single_form=True)
 def test_set_covering_with_ontology(self):
     self.load_do_ontology()
     self.conf_parser.config["do_via_orth_sentences_options"]["trimming_algorithm"] = "ic"
     self.conf_parser.config["do_via_orth_sentences_options"]["max_num_terms"] = 5
     associations = [DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080028", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080056", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:14789", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080026", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:14415", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:0080045", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:3371", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:8886", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:674", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:5614", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:11830", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:8398", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:2256", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:5327", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date=""),
                     DataManager.create_annotation_record(source_line="", gene_id="MGI:88452",
                                                          gene_symbol="", gene_type="gene", taxon_id="",
                                                          object_id="DOID:1123", qualifiers="", aspect="D",
                                                          ecode="ISS", references="", prvdr="WB", date="")]
     self.df.do_associations = AssociationSetFactory().create_from_assocs(assocs=associations,
                                                                          ontology=self.df.do_ontology)
     generator = OntologySentenceGenerator(gene_id="MGI:88452", module=Module.DO_ORTHOLOGY,
                                           data_manager=self.df, config=self.conf_parser)
     sentences = generator.get_module_sentences(
         config=self.conf_parser, aspect='D', qualifier='', merge_groups_with_same_prefix=True,
         keep_only_best_group=True, high_priority_term_ids=["DOID:0080028", "DOID:0080056", "DOID:14789",
                                                            "DOID:0080026", "DOID:14415", "DOID:0080045"])
     print(sentences.get_description())
Пример #11
0
def set_gene_ontology_module(dm: DataManager, conf_parser: GenedescConfigParser, gene_desc: GeneDescription,
                             gene: Gene):
    go_sent_generator_exp = OntologySentenceGenerator(gene_id=gene.id, module=Module.GO, data_manager=dm,
                                                      config=conf_parser, limit_to_group="EXPERIMENTAL")
    go_sent_generator = OntologySentenceGenerator(gene_id=gene.id, module=Module.GO, data_manager=dm,
                                                  config=conf_parser)

    # Generate sentences with experimental annotations only
    func_module_sentences_contributes_to = go_sent_generator_exp.get_module_sentences(
        aspect='F', qualifier='contributes_to', merge_groups_with_same_prefix=True, keep_only_best_group=True)
    func_module_sentences_noq = go_sent_generator_exp.get_module_sentences(
        aspect='F', qualifier='', merge_groups_with_same_prefix=True, keep_only_best_group=True)
    func_module_sentences_enables = go_sent_generator_exp.get_module_sentences(
        aspect='F', qualifier='enables', merge_groups_with_same_prefix=True, keep_only_best_group=True)

    # If experimental sentences are all empty, generate sentences with all annotations
    if not func_module_sentences_contributes_to.contains_sentences() and \
            not func_module_sentences_noq.contains_sentences() and \
            not func_module_sentences_enables.contains_sentences():
        func_module_sentences_contributes_to = go_sent_generator.get_module_sentences(
            aspect='F', qualifier='contributes_to', merge_groups_with_same_prefix=True, keep_only_best_group=True)
        func_module_sentences_noq = go_sent_generator.get_module_sentences(
            aspect='F', qualifier='', merge_groups_with_same_prefix=True, keep_only_best_group=True)
        func_module_sentences_enables = go_sent_generator.get_module_sentences(
            aspect='F', qualifier='enables', merge_groups_with_same_prefix=True, keep_only_best_group=True)

    gene_desc.set_or_extend_module_description_and_final_stats(module_sentences=func_module_sentences_noq,
                                                               module=Module.GO_FUNCTION)
    gene_desc.set_or_extend_module_description_and_final_stats(module_sentences=func_module_sentences_enables,
                                                               module=Module.GO_FUNCTION)
    gene_desc.set_or_extend_module_description_and_final_stats(module_sentences=func_module_sentences_contributes_to,
                                                               module=Module.GO_FUNCTION)

    proc_module_sentences_noq = go_sent_generator_exp.get_module_sentences(
        aspect='P', qualifier='', merge_groups_with_same_prefix=True, keep_only_best_group=True)
    proc_module_sentences_inv = go_sent_generator_exp.get_module_sentences(
        aspect='P', qualifier='involved_in', merge_groups_with_same_prefix=True, keep_only_best_group=True)
    proc_module_sentences_acts_pos = go_sent_generator_exp.get_module_sentences(
        aspect='P', qualifier='acts_upstream_of_positive_effect', merge_groups_with_same_prefix=True,
        keep_only_best_group=True)
    proc_module_sentences_acts_neg = go_sent_generator_exp.get_module_sentences(
        aspect='P', qualifier='acts_upstream_of_negative_effect', merge_groups_with_same_prefix=True,
        keep_only_best_group=True)
    proc_module_sentences_acts_with_pos = go_sent_generator_exp.get_module_sentences(
        aspect='P', qualifier='acts_upstream_of_or_within_positive_effect', merge_groups_with_same_prefix=True,
        keep_only_best_group=True)
    proc_module_sentences_acts_with_neg = go_sent_generator_exp.get_module_sentences(
        aspect='P', qualifier='acts_upstream_of_or_within_negative_effect', merge_groups_with_same_prefix=True,
        keep_only_best_group=True)
    proc_module_sentences_acts_with = go_sent_generator_exp.get_module_sentences(
        aspect='P', qualifier='acts_upstream_of_or_within', merge_groups_with_same_prefix=True,
        keep_only_best_group=True)

    if not proc_module_sentences_noq.contains_sentences() and \
            not proc_module_sentences_inv.contains_sentences() and \
            not proc_module_sentences_acts_pos.contains_sentences() and \
            not proc_module_sentences_acts_neg.contains_sentences() and \
            not proc_module_sentences_acts_with_pos.contains_sentences() and \
            not proc_module_sentences_acts_with_neg.contains_sentences() and \
            not proc_module_sentences_acts_with.contains_sentences():
        proc_module_sentences_noq = go_sent_generator.get_module_sentences(
            aspect='P', qualifier='', merge_groups_with_same_prefix=True, keep_only_best_group=True)
        proc_module_sentences_inv = go_sent_generator.get_module_sentences(
            aspect='P', qualifier='involved_in', merge_groups_with_same_prefix=True, keep_only_best_group=True)
        proc_module_sentences_acts_pos = go_sent_generator.get_module_sentences(
            aspect='P', qualifier='acts_upstream_of_positive_effect', merge_groups_with_same_prefix=True,
            keep_only_best_group=True)
        proc_module_sentences_acts_neg = go_sent_generator.get_module_sentences(
            aspect='P', qualifier='acts_upstream_of_negative_effect', merge_groups_with_same_prefix=True,
            keep_only_best_group=True)
        proc_module_sentences_acts_with_pos = go_sent_generator.get_module_sentences(
            aspect='P', qualifier='acts_upstream_of_or_within_positive_effect', merge_groups_with_same_prefix=True,
            keep_only_best_group=True)
        proc_module_sentences_acts_with_neg = go_sent_generator.get_module_sentences(
            aspect='P', qualifier='acts_upstream_of_or_within_negative_effect', merge_groups_with_same_prefix=True,
            keep_only_best_group=True)
        proc_module_sentences_acts_with = go_sent_generator.get_module_sentences(
            aspect='P', qualifier='acts_upstream_of_or_within', merge_groups_with_same_prefix=True,
            keep_only_best_group=True)

    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=proc_module_sentences_noq, module=Module.GO_PROCESS)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=proc_module_sentences_inv, module=Module.GO_PROCESS)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=proc_module_sentences_acts_pos, module=Module.GO_PROCESS)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=proc_module_sentences_acts_neg, module=Module.GO_PROCESS)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=proc_module_sentences_acts_with_pos, module=Module.GO_PROCESS)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=proc_module_sentences_acts_with_neg, module=Module.GO_PROCESS)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=proc_module_sentences_acts_with, module=Module.GO_PROCESS)

    comp_module_sentence_colocalizes_with = go_sent_generator_exp.get_module_sentences(
        aspect='C', qualifier='colocalizes_with', merge_groups_with_same_prefix=True,
        keep_only_best_group=True)
    comp_module_sentences_noq = go_sent_generator_exp.get_module_sentences(
        aspect='C', qualifier='', merge_groups_with_same_prefix=True, keep_only_best_group=True)
    comp_module_sentences_located = go_sent_generator_exp.get_module_sentences(
        aspect='C', qualifier='located_in', merge_groups_with_same_prefix=True, keep_only_best_group=True)
    comp_module_sentences_part = go_sent_generator_exp.get_module_sentences(
        aspect='C', qualifier='part_of', merge_groups_with_same_prefix=True, keep_only_best_group=True)
    comp_module_sentences_active = go_sent_generator_exp.get_module_sentences(
        aspect='C', qualifier='is_active_in', merge_groups_with_same_prefix=True, keep_only_best_group=True)

    if not comp_module_sentence_colocalizes_with.contains_sentences() and \
            not comp_module_sentences_noq.contains_sentences() and \
            not comp_module_sentences_located.contains_sentences() and \
            not comp_module_sentences_part.contains_sentences() and \
            not comp_module_sentences_active.contains_sentences():
        comp_module_sentence_colocalizes_with = go_sent_generator.get_module_sentences(
            aspect='C', qualifier='colocalizes_with', merge_groups_with_same_prefix=True,
            keep_only_best_group=True)
        comp_module_sentences_noq = go_sent_generator.get_module_sentences(
            aspect='C', qualifier='', merge_groups_with_same_prefix=True, keep_only_best_group=True)
        comp_module_sentences_located = go_sent_generator.get_module_sentences(
            aspect='C', qualifier='located_in', merge_groups_with_same_prefix=True, keep_only_best_group=True)
        comp_module_sentences_part = go_sent_generator.get_module_sentences(
            aspect='C', qualifier='part_of', merge_groups_with_same_prefix=True, keep_only_best_group=True)
        comp_module_sentences_active = go_sent_generator.get_module_sentences(
            aspect='C', qualifier='is_active_in', merge_groups_with_same_prefix=True, keep_only_best_group=True)

    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=comp_module_sentences_noq, module=Module.GO_COMPONENT)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=comp_module_sentences_located, module=Module.GO_COMPONENT)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=comp_module_sentences_part, module=Module.GO_COMPONENT)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=comp_module_sentences_active, module=Module.GO_COMPONENT)
    gene_desc.set_or_extend_module_description_and_final_stats(module_sentences=comp_module_sentence_colocalizes_with,
                                                               module=Module.GO_COMPONENT)

    gene_desc.set_or_update_initial_stats(module=Module.GO_FUNCTION, sent_generator=go_sent_generator,
                                          module_sentences=func_module_sentences_contributes_to)
    gene_desc.set_or_update_initial_stats(module=Module.GO_FUNCTION, sent_generator=go_sent_generator,
                                          module_sentences=func_module_sentences_noq)
    gene_desc.set_or_update_initial_stats(module=Module.GO_FUNCTION, sent_generator=go_sent_generator,
                                          module_sentences=func_module_sentences_enables)
    gene_desc.set_or_update_initial_stats(module=Module.GO_PROCESS, sent_generator=go_sent_generator,
                                          module_sentences=proc_module_sentences_noq)
    gene_desc.set_or_update_initial_stats(module=Module.GO_PROCESS, sent_generator=go_sent_generator,
                                          module_sentences=proc_module_sentences_inv)
    gene_desc.set_or_update_initial_stats(module=Module.GO_PROCESS, sent_generator=go_sent_generator,
                                          module_sentences=proc_module_sentences_acts_pos)
    gene_desc.set_or_update_initial_stats(module=Module.GO_PROCESS, sent_generator=go_sent_generator,
                                          module_sentences=proc_module_sentences_acts_neg)
    gene_desc.set_or_update_initial_stats(module=Module.GO_PROCESS, sent_generator=go_sent_generator,
                                          module_sentences=proc_module_sentences_acts_with_pos)
    gene_desc.set_or_update_initial_stats(module=Module.GO_PROCESS, sent_generator=go_sent_generator,
                                          module_sentences=proc_module_sentences_acts_with_neg)
    gene_desc.set_or_update_initial_stats(module=Module.GO_PROCESS, sent_generator=go_sent_generator,
                                          module_sentences=proc_module_sentences_acts_with)
    gene_desc.set_or_update_initial_stats(module=Module.GO_COMPONENT, sent_generator=go_sent_generator,
                                          module_sentences=comp_module_sentence_colocalizes_with)
    gene_desc.set_or_update_initial_stats(module=Module.GO_COMPONENT, sent_generator=go_sent_generator,
                                          module_sentences=comp_module_sentences_noq)
    gene_desc.set_or_update_initial_stats(module=Module.GO_COMPONENT, sent_generator=go_sent_generator,
                                          module_sentences=comp_module_sentences_located)
    gene_desc.set_or_update_initial_stats(module=Module.GO_COMPONENT, sent_generator=go_sent_generator,
                                          module_sentences=comp_module_sentences_part)
    gene_desc.set_or_update_initial_stats(module=Module.GO_COMPONENT, sent_generator=go_sent_generator,
                                          module_sentences=comp_module_sentences_active)
Пример #12
0
def set_gene_ontology_module(dm: DataManager,
                             conf_parser: GenedescConfigParser,
                             gene_desc: GeneDescription, gene: Gene):
    go_sent_generator_exp = OntologySentenceGenerator(
        gene_id=gene.id,
        module=Module.GO,
        data_manager=dm,
        config=conf_parser,
        limit_to_group="EXPERIMENTAL")
    go_sent_generator = OntologySentenceGenerator(gene_id=gene.id,
                                                  module=Module.GO,
                                                  data_manager=dm,
                                                  config=conf_parser)
    contributes_to_module_sentences = go_sent_generator.get_module_sentences(
        config=conf_parser,
        aspect='F',
        qualifier='contributes_to',
        merge_groups_with_same_prefix=True,
        keep_only_best_group=True)
    if contributes_to_module_sentences.contains_sentences():
        func_module_sentences = go_sent_generator_exp.get_module_sentences(
            config=conf_parser,
            aspect='F',
            merge_groups_with_same_prefix=True,
            keep_only_best_group=True)
        gene_desc.set_or_extend_module_description_and_final_stats(
            module_sentences=func_module_sentences, module=Module.GO_FUNCTION)
    else:
        func_module_sentences = go_sent_generator.get_module_sentences(
            config=conf_parser,
            aspect='F',
            merge_groups_with_same_prefix=True,
            keep_only_best_group=True)
        gene_desc.set_or_extend_module_description_and_final_stats(
            module_sentences=func_module_sentences, module=Module.GO_FUNCTION)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=contributes_to_module_sentences,
        module=Module.GO_FUNCTION)
    proc_module_sentences = go_sent_generator.get_module_sentences(
        config=conf_parser,
        aspect='P',
        merge_groups_with_same_prefix=True,
        keep_only_best_group=True)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=proc_module_sentences, module=Module.GO_PROCESS)
    colocalizes_with_module_sentences = go_sent_generator.get_module_sentences(
        config=conf_parser,
        aspect='C',
        qualifier='colocalizes_with',
        merge_groups_with_same_prefix=True,
        keep_only_best_group=True)
    if colocalizes_with_module_sentences.contains_sentences():
        comp_module_sentences = go_sent_generator_exp.get_module_sentences(
            config=conf_parser,
            aspect='C',
            merge_groups_with_same_prefix=True,
            keep_only_best_group=True)
        gene_desc.set_or_extend_module_description_and_final_stats(
            module_sentences=comp_module_sentences, module=Module.GO_COMPONENT)
    else:
        comp_module_sentences = go_sent_generator.get_module_sentences(
            config=conf_parser,
            aspect='C',
            merge_groups_with_same_prefix=True,
            keep_only_best_group=True)
        gene_desc.set_or_extend_module_description_and_final_stats(
            module_sentences=comp_module_sentences, module=Module.GO_COMPONENT)
    gene_desc.set_or_extend_module_description_and_final_stats(
        module_sentences=colocalizes_with_module_sentences,
        module=Module.GO_COMPONENT)
    gene_desc.set_initial_stats(
        module=Module.GO_FUNCTION,
        sentence_generator=go_sent_generator,
        sentence_generator_exp_only=go_sent_generator_exp)
    gene_desc.set_initial_stats(
        module=Module.GO_PROCESS,
        sentence_generator=go_sent_generator,
        sentence_generator_exp_only=go_sent_generator_exp)
    gene_desc.set_initial_stats(
        module=Module.GO_COMPONENT,
        sentence_generator=go_sent_generator,
        sentence_generator_exp_only=go_sent_generator_exp)