def _get_edge_with_curie_node(query_graph): for edge in query_graph.edges: source_node = eu.get_query_node(query_graph, edge.source_id) target_node = eu.get_query_node(query_graph, edge.target_id) if source_node.curie or target_node.curie: return edge return None
def _get_query_graph_for_edge(qedge, query_graph, dict_kg): # This function creates a query graph for the specified qedge, updating its qnodes' curies as needed edge_query_graph = QueryGraph(nodes=[], edges=[]) qnodes = [ eu.get_query_node(query_graph, qedge.source_id), eu.get_query_node(query_graph, qedge.target_id) ] # Add (a copy of) this qedge to our edge query graph edge_query_graph.edges.append(eu.copy_qedge(qedge)) # Update this qedge's qnodes as appropriate and add (copies of) them to the edge query graph qedge_has_already_been_expanded = qedge.id in dict_kg['edges'] qnodes_using_curies_from_prior_step = set() for qnode in qnodes: qnode_copy = eu.copy_qnode(qnode) # Handle case where we need to feed curies from a prior Expand() step as the curie for this qnode qnode_already_fulfilled = qnode_copy.id in dict_kg['nodes'] if qnode_already_fulfilled and not qnode_copy.curie and not qedge_has_already_been_expanded: qnode_copy.curie = list(dict_kg['nodes'][qnode_copy.id].keys()) qnodes_using_curies_from_prior_step.add(qnode_copy.id) edge_query_graph.nodes.append(qnode_copy) return edge_query_graph, qnodes_using_curies_from_prior_step
def _get_query_graph_for_edge(self, qedge: QEdge, query_graph: QueryGraph, dict_kg: DictKnowledgeGraph, use_synonyms: bool, kp_to_use: str, log: Response) -> QueryGraph: # This function creates a query graph for the specified qedge, updating its qnodes' curies as needed edge_query_graph = QueryGraph(nodes=[], edges=[]) qnodes = [ eu.get_query_node(query_graph, qedge.source_id), eu.get_query_node(query_graph, qedge.target_id) ] # Add (a copy of) this qedge to our edge query graph edge_query_graph.edges.append(eu.copy_qedge(qedge)) # Update this qedge's qnodes as appropriate and add (copies of) them to the edge query graph qedge_has_already_been_expanded = qedge.id in dict_kg.edges_by_qg_id for qnode in qnodes: qnode_copy = eu.copy_qnode(qnode) # Feed in curies from a prior Expand() step as the curie for this qnode as necessary qnode_already_fulfilled = qnode_copy.id in dict_kg.nodes_by_qg_id if qnode_already_fulfilled and not qnode_copy.curie and not qedge_has_already_been_expanded: qnode_copy.curie = list( dict_kg.nodes_by_qg_id[qnode_copy.id].keys()) edge_query_graph.nodes.append(qnode_copy) if use_synonyms: self._add_curie_synonyms_to_query_nodes( qnodes=edge_query_graph.nodes, log=log, kp=kp_to_use) # Consider both protein and gene if qnode's type is one of those (since KP's handle these differently) for qnode in edge_query_graph.nodes: if qnode.type in ['protein', 'gene']: qnode.type = ['protein', 'gene'] return edge_query_graph
def _convert_query_graph_to_cypher_query(self, enforce_directionality): if len(self.query_graph.edges) > 1: self.response.error( f"KGQuerier requires a single-edge query graph", error_code="InvalidQuery") else: self.response.debug( f"Generating cypher for edge {self.query_graph.edges[0].id} query graph" ) try: # Build the match clause edge = self.query_graph.edges[0] source_node = eu.get_query_node(self.query_graph, edge.source_id) target_node = eu.get_query_node(self.query_graph, edge.target_id) edge_cypher = self._get_cypher_for_query_edge( edge, enforce_directionality) source_node_cypher = self._get_cypher_for_query_node( source_node) target_node_cypher = self._get_cypher_for_query_node( target_node) match_clause = f"MATCH {source_node_cypher}{edge_cypher}{target_node_cypher}" # Build the where clause where_fragments = [] for node in [source_node, target_node]: if node.curie: if type(node.curie) is str: where_fragment = f"{node.id}.id='{node.curie}'" else: where_fragment = f"{node.id}.id in {node.curie}" where_fragments.append(where_fragment) if where_fragments: where_clause = "WHERE " where_clause += " AND ".join(where_fragments) else: where_clause = "" # Build the with clause source_node_col_name = f"nodes_{source_node.id}" target_node_col_name = f"nodes_{target_node.id}" edge_col_name = f"edges_{edge.id}" extra_edge_properties = "{.*, " + f"id:ID({edge.id}), {source_node.id}:{source_node.id}.id, {target_node.id}:{target_node.id}.id" + "}" with_clause = f"WITH collect(distinct {source_node.id}) as {source_node_col_name}, " \ f"collect(distinct {target_node.id}) as {target_node_col_name}, " \ f"collect(distinct {edge.id}{extra_edge_properties}) as {edge_col_name}" # Build the return clause return_clause = f"RETURN {source_node_col_name}, {target_node_col_name}, {edge_col_name}" self.cypher_query = f"{match_clause} {where_clause} {with_clause} {return_clause}" except Exception: tb = traceback.format_exc() error_type, error, _ = sys.exc_info() self.response.error( f"Problem generating cypher for query. {tb}", error_code=error_type.__name__)
def _convert_one_hop_query_graph_to_cypher_query(self, query_graph: QueryGraph, enforce_directionality: bool, kp: str, log: Response) -> str: log.debug(f"Generating cypher for edge {query_graph.edges[0].id} query graph") try: # Build the match clause qedge = query_graph.edges[0] source_qnode = eu.get_query_node(query_graph, qedge.source_id) target_qnode = eu.get_query_node(query_graph, qedge.target_id) qedge_cypher = self._get_cypher_for_query_edge(qedge, enforce_directionality) source_qnode_cypher = self._get_cypher_for_query_node(source_qnode) target_qnode_cypher = self._get_cypher_for_query_node(target_qnode) match_clause = f"MATCH {source_qnode_cypher}{qedge_cypher}{target_qnode_cypher}" # Build the where clause where_fragments = [] for qnode in [source_qnode, target_qnode]: if qnode.curie: if type(qnode.curie) is str: node_id_where_fragment = f"{qnode.id}.id='{qnode.curie}'" else: node_id_where_fragment = f"{qnode.id}.id in {qnode.curie}" where_fragments.append(node_id_where_fragment) if qnode.type and isinstance(qnode.type, list): if "KG2" in kp: node_type_property = "category_label" else: node_type_property = "category" where_fragments.append(f"{qnode.id}.{node_type_property} in {qnode.type}") if where_fragments: where_clause = f"WHERE {' AND '.join(where_fragments)}" else: where_clause = "" # Build the with clause source_qnode_col_name = f"nodes_{source_qnode.id}" target_qnode_col_name = f"nodes_{target_qnode.id}" qedge_col_name = f"edges_{qedge.id}" # This line grabs the edge's ID and a record of which of its nodes correspond to which qnode ID extra_edge_properties = "{.*, " + f"id:ID({qedge.id}), {source_qnode.id}:{source_qnode.id}.id, {target_qnode.id}:{target_qnode.id}.id" + "}" with_clause = f"WITH collect(distinct {source_qnode.id}) as {source_qnode_col_name}, " \ f"collect(distinct {target_qnode.id}) as {target_qnode_col_name}, " \ f"collect(distinct {qedge.id}{extra_edge_properties}) as {qedge_col_name}" # Build the return clause return_clause = f"RETURN {source_qnode_col_name}, {target_qnode_col_name}, {qedge_col_name}" cypher_query = f"{match_clause} {where_clause} {with_clause} {return_clause}" return cypher_query except Exception: tb = traceback.format_exc() error_type, error, _ = sys.exc_info() log.error(f"Problem generating cypher for query. {tb}", error_code=error_type.__name__) return ""
def _expand_node(self, qnode_id, kp_to_use, continue_if_no_results, query_graph): # This function expands a single node using the specified knowledge provider self.response.debug(f"Expanding node {qnode_id} using {kp_to_use}") query_node = eu.get_query_node(query_graph, qnode_id) if self.response.status != 'OK': return None if kp_to_use == 'BTE': self.response.error( f"Cannot use BTE to answer single node queries", error_code="InvalidQuery") return None elif kp_to_use == 'ARAX/KG2' or kp_to_use == 'ARAX/KG1': from Expand.kg_querier import KGQuerier kg_querier = KGQuerier(self.response, kp_to_use) answer_kg = kg_querier.answer_single_node_query(query_node) # Make sure all qnodes have been fulfilled (unless we're continuing if no results) if self.response.status == 'OK' and not continue_if_no_results: if query_node.id not in answer_kg[ 'nodes'] or not answer_kg['nodes'][query_node.id]: self.response.error( f"Returned answer KG does not contain any results for QNode {query_node.id}", error_code="UnfulfilledQGID") return answer_kg else: self.response.error( f"Invalid knowledge provider: {kp_to_use}. Valid options are ARAX/KG1 or ARAX/KG2" ) return None
def _extract_query_subgraph(qedge_ids_to_expand: List[str], query_graph: QueryGraph, log: Response) -> QueryGraph: # This function extracts a sub-query graph containing the provided qedge IDs from a larger query graph sub_query_graph = QueryGraph(nodes=[], edges=[]) for qedge_id in qedge_ids_to_expand: # Make sure this query edge actually exists in the query graph if not any(qedge.id == qedge_id for qedge in query_graph.edges): log.error( f"An edge with ID '{qedge_id}' does not exist in Message.QueryGraph", error_code="UnknownValue") return None qedge = next(qedge for qedge in query_graph.edges if qedge.id == qedge_id) # Make sure this qedge's qnodes actually exist in the query graph if not eu.get_query_node(query_graph, qedge.source_id): log.error( f"Qedge {qedge.id}'s source_id refers to a qnode that does not exist in the query graph: " f"{qedge.source_id}", error_code="InvalidQEdge") return None if not eu.get_query_node(query_graph, qedge.target_id): log.error( f"Qedge {qedge.id}'s target_id refers to a qnode that does not exist in the query graph: " f"{qedge.target_id}", error_code="InvalidQEdge") return None qnodes = [ eu.get_query_node(query_graph, qedge.source_id), eu.get_query_node(query_graph, qedge.target_id) ] # Add (copies of) this qedge and its two qnodes to our new query sub graph qedge_copy = eu.copy_qedge(qedge) if not any(qedge.id == qedge_copy.id for qedge in sub_query_graph.edges): sub_query_graph.edges.append(qedge_copy) for qnode in qnodes: qnode_copy = eu.copy_qnode(qnode) if not any(qnode.id == qnode_copy.id for qnode in sub_query_graph.nodes): sub_query_graph.nodes.append(qnode_copy) return sub_query_graph
def _expand_node(self, qnode_id: str, kp_to_use: str, continue_if_no_results: bool, query_graph: QueryGraph, use_synonyms: bool, synonym_handling: str, log: Response) -> DictKnowledgeGraph: # This function expands a single node using the specified knowledge provider log.debug(f"Expanding node {qnode_id} using {kp_to_use}") query_node = eu.get_query_node(query_graph, qnode_id) answer_kg = DictKnowledgeGraph() if log.status != 'OK': return answer_kg if not query_node.curie: log.error( f"Cannot expand a single query node if it doesn't have a curie", error_code="InvalidQuery") return answer_kg copy_of_qnode = eu.copy_qnode(query_node) if use_synonyms: self._add_curie_synonyms_to_query_nodes(qnodes=[copy_of_qnode], log=log, kp=kp_to_use) if copy_of_qnode.type in ["protein", "gene"]: copy_of_qnode.type = ["protein", "gene"] log.debug(f"Modified query node is: {copy_of_qnode.to_dict()}") # Answer the query using the proper KP valid_kps_for_single_node_queries = ["ARAX/KG1", "ARAX/KG2"] if kp_to_use in valid_kps_for_single_node_queries: from Expand.kg_querier import KGQuerier kg_querier = KGQuerier(log, kp_to_use) answer_kg = kg_querier.answer_single_node_query(copy_of_qnode) log.info( f"Query for node {copy_of_qnode.id} returned results ({eu.get_printable_counts_by_qg_id(answer_kg)})" ) # Make sure all qnodes have been fulfilled (unless we're continuing if no results) if log.status == 'OK' and not continue_if_no_results: if copy_of_qnode.id not in answer_kg.nodes_by_qg_id or not answer_kg.nodes_by_qg_id[ copy_of_qnode.id]: log.error( f"Returned answer KG does not contain any results for QNode {copy_of_qnode.id}", error_code="UnfulfilledQGID") return answer_kg if synonym_handling != 'add_all': answer_kg, edge_node_usage_map = self._deduplicate_nodes( dict_kg=answer_kg, edge_to_nodes_map={}, log=log) return answer_kg else: log.error( f"Invalid knowledge provider: {kp_to_use}. Valid options for single-node queries are " f"{', '.join(valid_kps_for_single_node_queries)}", error_code="InvalidKP") return answer_kg