def _answer_query_for_kps_who_dont_like_lists(self, query_graph: QueryGraph) -> Tuple[QGOrganizedKnowledgeGraph, Dict[str, Dict[str, str]]]: """ TRAPI 1.0 says qnode.category and qedge.predicate can both be strings OR lists, but many KPs don't support them being lists. So this function pings such KPs one by one for each possible subj_category--predicate--obj_category combination. """ qg_copy = eu.copy_qg(query_graph) # Use a copy of the QG so we don't modify the original qnodes = qg_copy.nodes qedge_key = next(qedge_key for qedge_key in qg_copy.edges) qedge = qg_copy.edges[qedge_key] subject_categories = qnodes[qedge.subject].category if qnodes[qedge.subject].category else [None] object_categories = qnodes[qedge.object].category if qnodes[qedge.object].category else [None] predicates = qedge.predicate if qedge.predicate else [None] possible_triples = [(subject_category, predicate, object_category) for subject_category in subject_categories for predicate in predicates for object_category in object_categories] answer_kg = QGOrganizedKnowledgeGraph() edge_to_nodes_map = dict() for possible_triple in possible_triples: current_subject_category = possible_triple[0] current_predicate = possible_triple[1] current_object_category = possible_triple[2] # Modify the QG so it's asking only for the current category--predicate--category triple qg_copy.nodes[qedge.subject].category = current_subject_category qg_copy.nodes[qedge.object].category = current_object_category qg_copy.edges[qedge_key].predicate = current_predicate self.log.debug(f"Current triple is: {current_subject_category}--{current_predicate}--{current_object_category}") sub_kg, sub_edge_to_nodes_map = self._answer_query_using_kp(qg_copy) # Merge the answers for this triple into our answers received thus far edge_to_nodes_map.update(sub_edge_to_nodes_map) answer_kg = eu.merge_two_kgs(sub_kg, answer_kg) return answer_kg, edge_to_nodes_map
def answer_single_node_query( self, single_node_qg: QueryGraph) -> QGOrganizedKnowledgeGraph: """ This function answers a single-node (edge-less) query using the specified KP. :param single_node_qg: A TRAPI query graph containing a single node (no edges). :return: An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as results for the query. (Organized by QG IDs.) """ log = self.log final_kg = QGOrganizedKnowledgeGraph() # Verify this query graph is valid, preprocess it for the KP's needs, and make sure it's answerable by the KP self._verify_is_single_node_query_graph(single_node_qg) if log.status != 'OK': return final_kg query_graph = self._preprocess_query_graph(single_node_qg) if log.status != 'OK': return final_kg # self._verify_qg_is_accepted_by_kp(query_graph) TODO: reinstate this once have smoothed out validation # if log.status != 'OK': # return final_kg, edge_to_nodes_map # Answer the query using the KP and load its answers into our object model final_kg, _ = self._answer_query_using_kp(query_graph) return final_kg
def answer_one_hop_query( self, query_graph: QueryGraph ) -> Tuple[QGOrganizedKnowledgeGraph, Dict[str, Dict[str, str]]]: """ This function answers a one-hop (single-edge) query using the specified KP. :param query_graph: A TRAPI query graph. :return: A tuple containing: 1. An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as results for the query. (Organized by QG IDs.) 2. A map of which nodes fulfilled which qnode_keys for each edge. Example: {'KG1:111221': {'n00': 'DOID:111', 'n01': 'HP:124'}, 'KG1:111223': {'n00': 'DOID:111', 'n01': 'HP:126'}} """ log = self.log final_kg = QGOrganizedKnowledgeGraph() edge_to_nodes_map = dict() # Verify this query graph is valid, preprocess it for the KP's needs, and make sure it's answerable by the KP self._verify_is_one_hop_query_graph(query_graph) if log.status != 'OK': return final_kg, edge_to_nodes_map query_graph = self._preprocess_query_graph(query_graph) if log.status != 'OK': return final_kg, edge_to_nodes_map # self._verify_qg_is_accepted_by_kp(query_graph) TODO: reinstate this once have smoothed out validation # if log.status != 'OK': # return final_kg, edge_to_nodes_map # Answer the query using the KP and load its answers into our object model final_kg, edge_to_nodes_map = self._answer_query_using_kp(query_graph) return final_kg, edge_to_nodes_map
def answer_one_hop_query(self, query_graph: QueryGraph) -> Tuple[QGOrganizedKnowledgeGraph, Dict[str, Dict[str, str]]]: """ This function answers a one-hop (single-edge) query using the specified KP. :param query_graph: A TRAPI query graph. :return: A tuple containing: 1. An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as results for the query. (Organized by QG IDs.) 2. A map of which nodes fulfilled which qnode_keys for each edge. Example: {'KG1:111221': {'n00': 'DOID:111', 'n01': 'HP:124'}, 'KG1:111223': {'n00': 'DOID:111', 'n01': 'HP:126'}} """ log = self.log final_kg = QGOrganizedKnowledgeGraph() edge_to_nodes_map = dict() qg_copy = eu.copy_qg(query_graph) # Create a copy so we don't modify the original # Verify this query graph is valid, preprocess it for the KP's needs, and make sure it's answerable by the KP self._verify_is_one_hop_query_graph(qg_copy) if log.status != 'OK': return final_kg, edge_to_nodes_map qg_copy = self._preprocess_query_graph(qg_copy) if log.status != 'OK': return final_kg, edge_to_nodes_map if not self.kp_name.endswith("KG2"): # Skip for KG2 for now since predicates/ isn't symmetric yet self._verify_qg_is_accepted_by_kp(qg_copy) if log.status != 'OK': return final_kg, edge_to_nodes_map # Answer the query using the KP and load its answers into our object model if self.kp_name.endswith("KG2"): # Our KPs can handle batch queries (where qnode.id is a list of curies) final_kg, edge_to_nodes_map = self._answer_query_using_kp(qg_copy) else: # Otherwise we need to search for curies one-by-one (until TRAPI includes a batch querying method) qedge = next(qedge for qedge in qg_copy.edges.values()) subject_qnode_curies = eu.convert_to_list(qg_copy.nodes[qedge.subject].id) subject_qnode_curies = subject_qnode_curies if subject_qnode_curies else [None] object_qnode_curies = eu.convert_to_list(qg_copy.nodes[qedge.object].id) object_qnode_curies = object_qnode_curies if object_qnode_curies else [None] curie_combinations = [(curie_subj, curie_obj) for curie_subj in subject_qnode_curies for curie_obj in object_qnode_curies] # Query KP for all pairs of subject/object curies (pairs look like ("curie1", None) if one has no curies) for curie_combination in curie_combinations: subject_curie = curie_combination[0] object_curie = curie_combination[1] qg_copy.nodes[qedge.subject].id = subject_curie qg_copy.nodes[qedge.object].id = object_curie self.log.debug(f"Current curie pair is: subject: {subject_curie}, object: {object_curie}") if self.kp_supports_category_lists and self.kp_supports_predicate_lists: sub_kg, sub_edge_to_nodes_map = self._answer_query_using_kp(qg_copy) else: sub_kg, sub_edge_to_nodes_map = self._answer_query_for_kps_who_dont_like_lists(qg_copy) edge_to_nodes_map.update(sub_edge_to_nodes_map) final_kg = eu.merge_two_kgs(sub_kg, final_kg) return final_kg, edge_to_nodes_map
def answer_one_hop_query( self, query_graph: QueryGraph ) -> Tuple[QGOrganizedKnowledgeGraph, Dict[str, Dict[str, str]]]: """ This function answers a one-hop (single-edge) query using the Molecular Provider. :param query_graph: A Reasoner API standard query graph. :return: A tuple containing: 1. an (almost) Reasoner API standard knowledge graph containing all of the nodes and edges returned as results for the query. (Dictionary version, organized by QG IDs.) 2. a map of which nodes fulfilled which qnode_keys for each edge. Example: {'KG1:111221': {'n00': 'DOID:111', 'n01': 'HP:124'}, 'KG1:111223': {'n00': 'DOID:111', 'n01': 'HP:126'}} """ log = self.response final_kg = QGOrganizedKnowledgeGraph() edge_to_nodes_map = dict() # Verify this is a valid one-hop query graph and tweak its contents as needed for this KP self._verify_one_hop_query_graph_is_valid(query_graph, log) if log.status != 'OK': return final_kg, edge_to_nodes_map modified_query_graph = self._pre_process_query_graph(query_graph, log) if log.status != 'OK': return final_kg, edge_to_nodes_map qedge = next(qedge for qedge in modified_query_graph.edges.values()) source_qnode_key = qedge.subject target_qnode_key = qedge.object # Answer the query using the KP and load its answers into our Swagger model json_response = self._send_query_to_kp(modified_query_graph, log) returned_kg = json_response.get('knowledge_graph') if not returned_kg: log.warning( f"No KG is present in the response from {self.kp_name}") else: # Build a map of node/edge IDs to qnode/qedge IDs qg_id_mappings = self._get_qg_id_mappings_from_results( json_response['results']) # Populate our final KG with nodes and edges for returned_edge_key, returned_edge in returned_kg['edges'].items( ): kp_edge_key, swagger_edge = self._create_swagger_edge_from_kp_edge( returned_edge_key, returned_edge) swagger_edge_key = self._create_unique_edge_key( swagger_edge) # Convert to an ID that's unique for us for qedge_key in qg_id_mappings['edges'][kp_edge_key]: final_kg.add_edge(swagger_edge_key, swagger_edge, qedge_key) edge_to_nodes_map[swagger_edge_key] = { source_qnode_key: swagger_edge.subject, target_qnode_key: swagger_edge.object } for returned_node_key, returned_node in returned_kg['nodes'].items( ): swagger_node_key, swagger_node = self._create_swagger_node_from_kp_node( returned_node_key, returned_node) for qnode_key in qg_id_mappings['nodes'][swagger_node_key]: final_kg.add_node(swagger_node_key, swagger_node, qnode_key) return final_kg, edge_to_nodes_map
def _answer_query_using_kp( self, query_graph: QueryGraph) -> QGOrganizedKnowledgeGraph: # TODO: Delete this method once we're ready to let go of the multiprocessing (vs. asyncio) option request_body = self._get_prepped_request_body(query_graph) query_timeout = self._get_query_timeout_length() # Avoid calling the KG2 TRAPI endpoint if the 'force_local' flag is set (used only for testing/dev work) if self.force_local and self.kp_name == 'infores:rtx-kg2': json_response = self._answer_query_force_local(request_body) # Otherwise send the query graph to the KP's TRAPI API else: self.log.debug( f"{self.kp_name}: Sending query to {self.kp_name} API") try: with requests_cache.disabled(): start = time.time() kp_response = requests.post( f"{self.kp_endpoint}/query", json=request_body, headers={'accept': 'application/json'}, timeout=query_timeout) self.log.wait_time = round(time.time() - start) except Exception: timeout_message = f"Query timed out after {query_timeout} seconds" self.log.warning(f"{self.kp_name}: {timeout_message}") self.log.timed_out = query_timeout return QGOrganizedKnowledgeGraph() if kp_response.status_code != 200: self.log.warning( f"{self.kp_name} API returned response of {kp_response.status_code}. " f"Response from KP was: {kp_response.text}") self.log.http_error = f"HTTP {kp_response.status_code}" return QGOrganizedKnowledgeGraph() else: json_response = kp_response.json() answer_kg = self._load_kp_json_response(json_response) return answer_kg
def answer_single_node_query( self, single_node_qg: QueryGraph) -> QGOrganizedKnowledgeGraph: """ This function answers a single-node (edge-less) query using the specified KP. :param single_node_qg: A TRAPI query graph containing a single node (no edges). :return: An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as results for the query. (Organized by QG IDs.) """ log = self.log final_kg = QGOrganizedKnowledgeGraph() qg_copy = copy.deepcopy(single_node_qg) # Verify this query graph is valid, preprocess it for the KP's needs, and make sure it's answerable by the KP self._verify_is_single_node_query_graph(qg_copy) if log.status != 'OK': return final_kg # Answer the query using the KP and load its answers into our object model final_kg = self._answer_query_using_kp(qg_copy) return final_kg
def answer_one_hop_query( self, query_graph: QueryGraph) -> QGOrganizedKnowledgeGraph: """ This function answers a one-hop (single-edge) query using the specified KP. :param query_graph: A TRAPI query graph. :return: An (almost) TRAPI knowledge graph containing all of the nodes and edges returned as results for the query. (Organized by QG IDs.) """ # TODO: Delete this method once we're ready to let go of the multiprocessing (vs. asyncio) option log = self.log final_kg = QGOrganizedKnowledgeGraph() qg_copy = copy.deepcopy( query_graph) # Create a copy so we don't modify the original qedge_key = next(qedge_key for qedge_key in qg_copy.edges) self._verify_is_one_hop_query_graph(qg_copy) if log.status != 'OK': return final_kg # Verify that the KP accepts these predicates/categories/prefixes if self.kp_name != "infores:rtx-kg2": if self.user_specified_kp: # This is already done if expand chose the KP itself if not self.kp_selector.kp_accepts_single_hop_qg( qg_copy, self.kp_name): log.error( f"{self.kp_name} cannot answer queries with the specified categories/predicates", error_code="UnsupportedQG") return final_kg # Convert the QG so that it uses curies with prefixes the KP likes qg_copy = self.kp_selector.make_qg_use_supported_prefixes( qg_copy, self.kp_name, log) if not qg_copy: # Means no equivalent curies with supported prefixes were found skipped_message = f"No equivalent curies with supported prefixes found" log.update_query_plan(qedge_key, self.kp_name, "Skipped", skipped_message) return final_kg # Answer the query using the KP and load its answers into our object model final_kg = self._answer_query_using_kp(qg_copy) return final_kg
def _answer_query_using_kp(self, query_graph: QueryGraph) -> Tuple[QGOrganizedKnowledgeGraph, Dict[str, Dict[str, str]]]: answer_kg = QGOrganizedKnowledgeGraph() edge_to_nodes_map = dict() # Strip non-essential and 'empty' properties off of our qnodes and qedges stripped_qnodes = {qnode_key: self._strip_empty_properties(qnode) for qnode_key, qnode in query_graph.nodes.items()} stripped_qedges = {qedge_key: self._strip_empty_properties(qedge) for qedge_key, qedge in query_graph.edges.items()} # Send the query to the KP's API body = {'message': {'query_graph': {'nodes': stripped_qnodes, 'edges': stripped_qedges}}} self.log.debug(f"Sending query to {self.kp_name} API") kp_response = requests.post(f"{self.kp_endpoint}/query", json=body, headers={'accept': 'application/json'}) json_response = kp_response.json() if kp_response.status_code == 200: if not json_response.get("message"): self.log.warning( f"No 'message' was included in the response from {self.kp_name}. Response from KP was: " f"{json.dumps(json_response, indent=4)}") elif not json_response["message"].get("results"): self.log.warning(f"No 'results' were returned from {self.kp_name}. Response from KP was: " f"{json.dumps(json_response, indent=4)}") json_response["message"]["results"] = [] # Setting this to empty list helps downstream processing else: kp_message = ARAXMessenger().from_dict(json_response["message"]) # Build a map that indicates which qnodes/qedges a given node/edge fulfills kg_to_qg_mappings = self._get_kg_to_qg_mappings_from_results(kp_message.results) # Populate our final KG with the returned nodes and edges for returned_edge_key, returned_edge in kp_message.knowledge_graph.edges.items(): arax_edge_key = self._get_arax_edge_key(returned_edge) # Convert to an ID that's unique for us for qedge_key in kg_to_qg_mappings['edges'][returned_edge_key]: answer_kg.add_edge(arax_edge_key, returned_edge, qedge_key) for returned_node_key, returned_node in kp_message.knowledge_graph.nodes.items(): for qnode_key in kg_to_qg_mappings['nodes'][returned_node_key]: answer_kg.add_node(returned_node_key, returned_node, qnode_key) # Build a map that indicates which of an edge's nodes fulfill which qnode if query_graph.edges: qedge = next(qedge for qedge in query_graph.edges.values()) edge_to_nodes_map = self._create_edge_to_nodes_map(kg_to_qg_mappings, kp_message.knowledge_graph, qedge) else: self.log.warning(f"{self.kp_name} API returned response of {kp_response.status_code}. Response from KP was:" f" {json.dumps(json_response, indent=4)}") return answer_kg, edge_to_nodes_map
def _load_kp_json_response( self, json_response: dict) -> QGOrganizedKnowledgeGraph: # Load the results into the object model answer_kg = QGOrganizedKnowledgeGraph() if not json_response.get("message"): self.log.warning( f"{self.kp_name}: No 'message' was included in the response from {self.kp_name}. " f"Response was: {json.dumps(json_response, indent=4)}") return answer_kg elif not json_response["message"].get("results"): self.log.debug(f"{self.kp_name}: No 'results' were returned.") json_response["message"]["results"] = [ ] # Setting this to empty list helps downstream processing return answer_kg else: self.log.debug(f"{self.kp_name}: Got results from {self.kp_name}.") kp_message = ARAXMessenger().from_dict(json_response["message"]) # Build a map that indicates which qnodes/qedges a given node/edge fulfills kg_to_qg_mappings = self._get_kg_to_qg_mappings_from_results( kp_message.results) # Populate our final KG with the returned nodes and edges returned_edge_keys_missing_qg_bindings = set() for returned_edge_key, returned_edge in kp_message.knowledge_graph.edges.items( ): arax_edge_key = self._get_arax_edge_key( returned_edge) # Convert to an ID that's unique for us if not returned_edge.attributes: returned_edge.attributes = [] # Put in a placeholder for missing required attribute fields to try to keep our answer TRAPI-compliant for attribute in returned_edge.attributes: if not attribute.attribute_type_id: attribute.attribute_type_id = f"not provided (this attribute came from {self.kp_name})" # Check if KPs are properly indicating that these edges came from them (indicate it ourselves if not) attribute_has_kp_name = lambda value, kp_name: (type( value) is list and kp_name in value) or (value == kp_name) if not any( attribute_has_kp_name(attribute.value, self.kp_name) for attribute in returned_edge.attributes): returned_edge.attributes.append( eu.get_kp_source_attribute(self.kp_name)) # Add an attribute to indicate that this edge passed through ARAX returned_edge.attributes.append(eu.get_arax_source_attribute()) if returned_edge_key in kg_to_qg_mappings['edges']: for qedge_key in kg_to_qg_mappings['edges'][returned_edge_key]: answer_kg.add_edge(arax_edge_key, returned_edge, qedge_key) else: returned_edge_keys_missing_qg_bindings.add(returned_edge_key) if returned_edge_keys_missing_qg_bindings: self.log.warning( f"{self.kp_name}: {len(returned_edge_keys_missing_qg_bindings)} edges in the KP's answer " f"KG have no bindings to the QG: {returned_edge_keys_missing_qg_bindings}" ) returned_node_keys_missing_qg_bindings = set() for returned_node_key, returned_node in kp_message.knowledge_graph.nodes.items( ): if returned_node_key not in kg_to_qg_mappings['nodes']: returned_node_keys_missing_qg_bindings.add(returned_node_key) else: for qnode_key in kg_to_qg_mappings['nodes'][returned_node_key]: answer_kg.add_node(returned_node_key, returned_node, qnode_key) if returned_node.attributes: for attribute in returned_node.attributes: if not attribute.attribute_type_id: attribute.attribute_type_id = f"not provided (this attribute came from {self.kp_name})" if returned_node_keys_missing_qg_bindings: self.log.warning( f"{self.kp_name}: {len(returned_node_keys_missing_qg_bindings)} nodes in the KP's answer " f"KG have no bindings to the QG: {returned_node_keys_missing_qg_bindings}" ) return answer_kg
async def _answer_query_using_kp_async( self, query_graph: QueryGraph) -> QGOrganizedKnowledgeGraph: request_body = self._get_prepped_request_body(query_graph) query_sent = copy.deepcopy(request_body) query_timeout = self._get_query_timeout_length() qedge_key = next(qedge_key for qedge_key in query_graph.edges) # Avoid calling the KG2 TRAPI endpoint if the 'force_local' flag is set (used only for testing/dev work) num_input_curies = max([ len(eu.convert_to_list(qnode.ids)) for qnode in query_graph.nodes.values() ]) waiting_message = f"Query with {num_input_curies} curies sent: waiting for response" self.log.update_query_plan(qedge_key, self.kp_name, "Waiting", waiting_message, query=query_sent) start = time.time() if self.force_local and self.kp_name == 'infores:rtx-kg2': json_response = self._answer_query_force_local(request_body) # Otherwise send the query graph to the KP's TRAPI API else: self.log.debug( f"{self.kp_name}: Sending query to {self.kp_name} API") async with aiohttp.ClientSession(connector=aiohttp.TCPConnector( verify_ssl=False)) as session: try: async with session.post( f"{self.kp_endpoint}/query", json=request_body, headers={'accept': 'application/json'}, timeout=query_timeout) as response: if response.status == 200: json_response = await response.json() else: wait_time = round(time.time() - start) http_error_message = f"Returned HTTP error {response.status} after {wait_time} seconds" self.log.warning( f"{self.kp_name}: {http_error_message}. Query sent to KP was: {request_body}" ) self.log.update_query_plan(qedge_key, self.kp_name, "Error", http_error_message) return QGOrganizedKnowledgeGraph() except concurrent.futures._base.TimeoutError: timeout_message = f"Query timed out after {query_timeout} seconds" self.log.warning(f"{self.kp_name}: {timeout_message}") self.log.update_query_plan(qedge_key, self.kp_name, "Timed out", timeout_message) return QGOrganizedKnowledgeGraph() except Exception as ex: wait_time = round(time.time() - start) exception_message = f"Request threw exception after {wait_time} seconds: {type(ex)}" self.log.warning(f"{self.kp_name}: {exception_message}") self.log.update_query_plan(qedge_key, self.kp_name, "Error", exception_message) return QGOrganizedKnowledgeGraph() wait_time = round(time.time() - start) answer_kg = self._load_kp_json_response(json_response) done_message = f"Returned {len(answer_kg.edges_by_qg_id.get(qedge_key, dict()))} edges in {wait_time} seconds" self.log.update_query_plan(qedge_key, self.kp_name, "Done", done_message) return answer_kg