Exemplo n.º 1
0
 def _answer_query_using_bte(self, input_qnode_key: str, output_qnode_key: str, qg: QueryGraph,
                             answer_kg: QGOrganizedKnowledgeGraph, valid_bte_inputs_dict: Dict[str, Set[str]],
                             log: ARAXResponse) -> Tuple[QGOrganizedKnowledgeGraph, Set[str]]:
     accepted_curies = set()
     qedge_key = next(qedge_key for qedge_key in qg.edges)
     qedge = qg.edges[qedge_key]
     input_qnode = qg.nodes[input_qnode_key]
     output_qnode = qg.nodes[output_qnode_key]
     # Send this single-edge query to BTE, input curie by input curie (adding findings to our answer KG as we go)
     for curie in input_qnode.id:
         # Consider all different combinations of qnode types (can be multiple if gene/protein)
         for input_qnode_category, output_qnode_category in itertools.product(input_qnode.category, output_qnode.category):
             if eu.get_curie_prefix(curie) in valid_bte_inputs_dict['curie_prefixes']:
                 accepted_curies.add(curie)
                 try:
                     loop = asyncio.new_event_loop()
                     seqd = SingleEdgeQueryDispatcher(input_cls=input_qnode_category,
                                                      output_cls=output_qnode_category,
                                                      pred=qedge.predicate,
                                                      input_id=eu.get_curie_prefix(curie),
                                                      values=eu.get_curie_local_id(curie),
                                                      loop=loop)
                     log.debug(f"Sending query to BTE: {curie}-{qedge.predicate if qedge.predicate else ''}->{output_qnode_category}")
                     seqd.query()
                     reasoner_std_response = seqd.to_reasoner_std()
                 except Exception:
                     trace_back = traceback.format_exc()
                     error_type, error, _ = sys.exc_info()
                     log.error(f"Encountered a problem while using BioThings Explorer. {trace_back}",
                               error_code=error_type.__name__)
                     return answer_kg, accepted_curies
                 else:
                     answer_kg = self._add_answers_to_kg(answer_kg, reasoner_std_response, input_qnode_key, output_qnode_key, qedge_key, log)
     return answer_kg, accepted_curies
Exemplo n.º 2
0
    def _get_best_equivalent_bte_curie(equivalent_curies: List[str], node_category: str) -> str:
        # Curie prefixes in order of preference for different node types (not all-inclusive)
        preferred_node_prefixes_dict = {'chemical_substance': ['CHEMBL.COMPOUND', 'CHEBI'],
                                        'protein': ['UNIPROTKB', 'PR'],
                                        'gene': ['NCBIGENE', 'ENSEMBL', 'HGNC', 'GO'],
                                        'disease': ['DOID', 'MONDO', 'OMIM', 'MESH'],
                                        'phenotypic_feature': ['HP', 'OMIM'],
                                        'anatomical_entity': ['UBERON', 'FMA', 'CL'],
                                        'pathway': ['REACTOME'],
                                        'biological_process': ['GO'],
                                        'cellular_component': ['GO']}
        prefixes_in_order_of_preference = preferred_node_prefixes_dict.get(eu.convert_string_to_snake_case(node_category), [])
        equivalent_curies.sort()

        # Pick the curie that uses the (relatively) most preferred prefix
        lowest_ranking = 10000
        best_curie = None
        for curie in equivalent_curies:
            uppercase_prefix = eu.get_curie_prefix(curie).upper()
            if uppercase_prefix in prefixes_in_order_of_preference:
                ranking = prefixes_in_order_of_preference.index(uppercase_prefix)
                if ranking < lowest_ranking:
                    lowest_ranking = ranking
                    best_curie = curie

        # Otherwise, just try to pick one that isn't 'NAME:___'
        if not best_curie:
            non_name_curies = [curie for curie in equivalent_curies if eu.get_curie_prefix(curie).upper() != 'NAME']
            best_curie = non_name_curies[0] if non_name_curies else equivalent_curies[0]

        return best_curie