Exemplo n.º 1
0
    def __init__(self,
                 gene_id: str,
                 module: Module,
                 data_manager: DataManager,
                 config: GenedescConfigParser,
                 limit_to_group: str = None,
                 humans: bool = False):
        """initialize sentence generator object

        Args:
            config (GenedescConfigParser): an optional config object from which to read the options
            limit_to_group (str): limit the evidence codes to the specified group
        """
        self.ontology = data_manager.get_ontology(
            get_data_type_from_module(module))
        self.config = config
        self.module = module
        self.terms_already_covered = set()
        self.terms_groups = defaultdict(lambda: defaultdict(set))
        self.evidence_groups_priority_list = config.get_evidence_groups_priority_list(
            module=module)
        self.prepostfix_sentences_map = config.get_prepostfix_sentence_map(
            module=module, humans=humans)
        self.gene_annots = data_manager.get_annotations_for_gene(
            gene_id=gene_id,
            annot_type=get_data_type_from_module(module),
            priority_list=config.get_annotations_priority(module=module))
        self.trimmer = CONF_TO_TRIMMING_CLASS[config.get_module_property(
            module=module, prop=ConfigModuleProperty.TRIMMING_ALGORITHM)](
                ontology=self.ontology,
                annotations=data_manager.get_associations(
                    get_data_type_from_module(module)),
                nodeids_blacklist=config.get_module_property(
                    module=module, prop=ConfigModuleProperty.EXCLUDE_TERMS),
                slim_terms_ic_bonus_perc=config.get_module_property(
                    module=module, prop=ConfigModuleProperty.SLIM_BONUS_PERC),
                slim_set=data_manager.get_slim(module=module))
        self.set_terms_groups(module, config, limit_to_group, humans)
Exemplo n.º 2
0
    def __init__(self,
                 gene_id: str,
                 module: Module,
                 data_manager: DataManager,
                 config: GenedescConfigParser,
                 limit_to_group: str = None,
                 humans: bool = False):
        """initialize sentence generator object

        Args:
            config (GenedescConfigParser): an optional config object from which to read the options
            limit_to_group (str): limit the evidence codes to the specified group
        """
        annot_type = None
        if module == Module.DO_ORTHOLOGY or module == Module.DO_EXPERIMENTAL or module == module.DO_BIOMARKER:
            self.ontology = data_manager.do_ontology
            annot_type = DataType.DO
        elif module == Module.GO:
            self.ontology = data_manager.go_ontology
            annot_type = DataType.GO
        elif module == Module.EXPRESSION:
            self.ontology = data_manager.expression_ontology
            annot_type = DataType.EXPR
        self.evidence_groups_priority_list = config.get_evidence_groups_priority_list(
            module=module)
        self.prepostfix_sentences_map = config.get_prepostfix_sentence_map(
            module=module, humans=humans)
        self.terms_groups = defaultdict(lambda: defaultdict(set))
        ev_codes_groups_maps = config.get_evidence_codes_groups_map(
            module=module)
        annotations = data_manager.get_annotations_for_gene(
            gene_id=gene_id,
            annot_type=annot_type,
            priority_list=config.get_annotations_priority(module=module))
        self.annotations = annotations
        self.module = module
        self.data_manager = data_manager
        self.annot_type = annot_type
        evidence_codes_groups_map = {
            evcode: group
            for evcode, group in ev_codes_groups_maps.items()
            if limit_to_group is None
            or limit_to_group in ev_codes_groups_maps[evcode]
        }
        prepostfix_special_cases_sent_map = config.get_prepostfix_sentence_map(
            module=module, special_cases_only=True, humans=humans)
        if len(annotations) > 0:
            for annotation in annotations:
                if annotation["evidence"]["type"] in evidence_codes_groups_map:
                    aspect = annotation["aspect"]
                    ev_group = evidence_codes_groups_map[annotation["evidence"]
                                                         ["type"]]
                    qualifier = "_".join(
                        sorted(annotation["qualifiers"]
                               )) if "qualifiers" in annotation else ""
                    if prepostfix_special_cases_sent_map and (aspect, ev_group, qualifier) in \
                            prepostfix_special_cases_sent_map:
                        for special_case in prepostfix_special_cases_sent_map[(
                                aspect, ev_group, qualifier)]:
                            if re.match(
                                    re.escape(special_case[1]),
                                    self.ontology.label(
                                        annotation["object"]["id"],
                                        id_if_null=True)):
                                ev_group = evidence_codes_groups_map[annotation["evidence"]["type"]] + \
                                           str(special_case[0])
                                if ev_group not in self.evidence_groups_priority_list:
                                    self.evidence_groups_priority_list.insert(
                                        self.evidence_groups_priority_list.
                                        index(evidence_codes_groups_map[
                                            annotation["evidence"]["type"]]) +
                                        1, ev_group)
                                break
                    self.terms_groups[(aspect, qualifier)][ev_group].add(
                        annotation["object"]["id"])