def _answer_query_using_bte(self, input_qnode_key: str, output_qnode_key: str, qg: QueryGraph, answer_kg: QGOrganizedKnowledgeGraph, valid_bte_inputs_dict: Dict[str, Set[str]], log: ARAXResponse) -> Tuple[QGOrganizedKnowledgeGraph, Set[str]]: accepted_curies = set() qedge_key = next(qedge_key for qedge_key in qg.edges) qedge = qg.edges[qedge_key] input_qnode = qg.nodes[input_qnode_key] output_qnode = qg.nodes[output_qnode_key] # Send this single-edge query to BTE, input curie by input curie (adding findings to our answer KG as we go) for curie in input_qnode.id: # Consider all different combinations of qnode types (can be multiple if gene/protein) for input_qnode_category, output_qnode_category in itertools.product(input_qnode.category, output_qnode.category): if eu.get_curie_prefix(curie) in valid_bte_inputs_dict['curie_prefixes']: accepted_curies.add(curie) try: loop = asyncio.new_event_loop() seqd = SingleEdgeQueryDispatcher(input_cls=input_qnode_category, output_cls=output_qnode_category, pred=qedge.predicate, input_id=eu.get_curie_prefix(curie), values=eu.get_curie_local_id(curie), loop=loop) log.debug(f"Sending query to BTE: {curie}-{qedge.predicate if qedge.predicate else ''}->{output_qnode_category}") seqd.query() reasoner_std_response = seqd.to_reasoner_std() except Exception: trace_back = traceback.format_exc() error_type, error, _ = sys.exc_info() log.error(f"Encountered a problem while using BioThings Explorer. {trace_back}", error_code=error_type.__name__) return answer_kg, accepted_curies else: answer_kg = self._add_answers_to_kg(answer_kg, reasoner_std_response, input_qnode_key, output_qnode_key, qedge_key, log) return answer_kg, accepted_curies
def _get_best_equivalent_bte_curie(equivalent_curies: List[str], node_category: str) -> str: # Curie prefixes in order of preference for different node types (not all-inclusive) preferred_node_prefixes_dict = {'chemical_substance': ['CHEMBL.COMPOUND', 'CHEBI'], 'protein': ['UNIPROTKB', 'PR'], 'gene': ['NCBIGENE', 'ENSEMBL', 'HGNC', 'GO'], 'disease': ['DOID', 'MONDO', 'OMIM', 'MESH'], 'phenotypic_feature': ['HP', 'OMIM'], 'anatomical_entity': ['UBERON', 'FMA', 'CL'], 'pathway': ['REACTOME'], 'biological_process': ['GO'], 'cellular_component': ['GO']} prefixes_in_order_of_preference = preferred_node_prefixes_dict.get(eu.convert_string_to_snake_case(node_category), []) equivalent_curies.sort() # Pick the curie that uses the (relatively) most preferred prefix lowest_ranking = 10000 best_curie = None for curie in equivalent_curies: uppercase_prefix = eu.get_curie_prefix(curie).upper() if uppercase_prefix in prefixes_in_order_of_preference: ranking = prefixes_in_order_of_preference.index(uppercase_prefix) if ranking < lowest_ranking: lowest_ranking = ranking best_curie = curie # Otherwise, just try to pick one that isn't 'NAME:___' if not best_curie: non_name_curies = [curie for curie in equivalent_curies if eu.get_curie_prefix(curie).upper() != 'NAME'] best_curie = non_name_curies[0] if non_name_curies else equivalent_curies[0] return best_curie