def _answer_query_using_neo4j( self, cypher_query: str, qedge_key: str, kg_name: str, log: ARAXResponse) -> List[Dict[str, List[Dict[str, any]]]]: log.info( f"Sending cypher query for edge {qedge_key} to {kg_name} neo4j") results_from_neo4j = self._run_cypher_query(cypher_query, kg_name, log) if log.status == 'OK': columns_with_lengths = dict() for column in results_from_neo4j[0]: columns_with_lengths[column] = len( results_from_neo4j[0].get(column)) return results_from_neo4j
def make_qg_use_supported_prefixes( self, qg: QueryGraph, kp_name: str, log: ARAXResponse) -> Optional[QueryGraph]: for qnode_key, qnode in qg.nodes.items(): if qnode.ids: if kp_name == "infores:rtx-kg2": # Just convert them into canonical curies qnode.ids = eu.get_canonical_curies_list(qnode.ids, log) else: # Otherwise figure out which kind of curies KPs want categories = eu.convert_to_list(qnode.categories) supported_prefixes = self._get_supported_prefixes( categories, kp_name) used_prefixes = { self._get_uppercase_prefix(curie) for curie in qnode.ids } # Only convert curie(s) if any use an unsupported prefix if used_prefixes.issubset(supported_prefixes): self.log.debug( f"{kp_name}: All {qnode_key} curies use prefix(es) {kp_name} supports; no " f"conversion necessary") else: self.log.debug( f"{kp_name}: One or more {qnode_key} curies use a prefix {kp_name} doesn't " f"support; will convert these") converted_curies = self.get_desirable_equivalent_curies( qnode.ids, qnode.categories, kp_name) if converted_curies: log.debug( f"{kp_name}: Converted {qnode_key}'s {len(qnode.ids)} curies to a list of " f"{len(converted_curies)} curies tailored for {kp_name}" ) qnode.ids = converted_curies else: log.info( f"{kp_name} cannot answer the query because no equivalent curies were found " f"with prefixes it supports for qnode {qnode_key}. Original curies were: " f"{qnode.ids}") return None return qg
def _answer_single_node_query_using_neo4j(self, qnode_key: str, qg: QueryGraph, kg_name: str, log: ARAXResponse): qnode = qg.nodes[qnode_key] answer_kg = QGOrganizedKnowledgeGraph() # Build and run a cypher query to get this node/nodes where_clause = f"{qnode_key}.id='{qnode.id}'" if type( qnode.id) is str else f"{qnode_key}.id in {qnode.id}" cypher_query = f"MATCH {self._get_cypher_for_query_node(qnode_key, qg)} WHERE {where_clause} RETURN {qnode_key}" log.info( f"Sending cypher query for node {qnode_key} to {kg_name} neo4j") results = self._run_cypher_query(cypher_query, kg_name, log) # Load the results into API object model and add to our answer knowledge graph for result in results: neo4j_node = result.get(qnode_key) node_key, node = self._convert_neo4j_node_to_trapi_node( neo4j_node, kg_name) answer_kg.add_node(node_key, node, qnode_key) return answer_kg
def assess(self, message): #### Define a default response response = ARAXResponse() self.response = response self.message = message response.debug(f"Assessing the QueryGraph for basic information") #### Get shorter handles query_graph = message.query_graph nodes = query_graph.nodes edges = query_graph.edges #### Store number of nodes and edges self.n_nodes = len(nodes) self.n_edges = len(edges) response.debug(f"Found {self.n_nodes} nodes and {self.n_edges} edges") #### Handle impossible cases if self.n_nodes == 0: response.error( "QueryGraph has 0 nodes. At least 1 node is required", error_code="QueryGraphZeroNodes") return response if self.n_nodes == 1 and self.n_edges > 0: response.error( "QueryGraph may not have edges if there is only one node", error_code="QueryGraphTooManyEdges") return response #if self.n_nodes == 2 and self.n_edges > 1: # response.error("QueryGraph may not have more than 1 edge if there are only 2 nodes", error_code="QueryGraphTooManyEdges") # return response #### Loop through nodes computing some stats node_info = {} self.node_category_map = {} for key, qnode in nodes.items(): node_info[key] = { 'key': key, 'node_object': qnode, 'has_id': False, 'category': qnode.category, 'has_category': False, 'is_set': False, 'n_edges': 0, 'n_links': 0, 'is_connected': False, 'edges': [], 'edge_dict': {} } if qnode.id is not None: node_info[key]['has_id'] = True #### If the user did not specify a category, but there is a curie, try to figure out the category if node_info[key]['category'] is None: synonymizer = NodeSynonymizer() curie = qnode.id curies_list = qnode.id if isinstance(qnode.id, list): curie = qnode.id[0] else: curies_list = [qnode.id] canonical_curies = synonymizer.get_canonical_curies( curies=curies_list, return_all_categories=True) if curie in canonical_curies and 'preferred_type' in canonical_curies[ curie]: node_info[key]['has_category'] = True node_info[key]['category'] = canonical_curies[curie][ 'preferred_type'] if qnode.category is not None: node_info[key]['has_category'] = True #if qnode.is_set is not None: node_info[key]['is_set'] = True if key is None: response.error( "QueryGraph has a node with null key. This is not permitted", error_code="QueryGraphNodeWithNoId") return response #### Remap the node categorys from unsupported to supported if qnode.category is not None: qnode.category = self.remap_node_category(qnode.category) #### Store lookup of categorys warning_counter = 0 if qnode.category is None or (isinstance(qnode.category, list) and len(qnode.category) == 0): if warning_counter == 0: #response.debug("QueryGraph has nodes with no category. This may cause problems with results inference later") pass warning_counter += 1 self.node_category_map['unknown'] = key else: category = qnode.category if isinstance(qnode.category, list): category = qnode.category[ 0] # FIXME this is a hack prior to proper list handling self.node_category_map[category] = key #### Loop through edges computing some stats edge_info = {} self.edge_predicate_map = {} unique_links = {} #### Ignore special informationational edges for now. virtual_edge_predicates = { 'has_normalized_google_distance_with': 1, 'has_fisher_exact_test_p-value_with': 1, 'has_jaccard_index_with': 1, 'probably_treats': 1, 'has_paired_concept_frequency_with': 1, 'has_observed_expected_ratio_with': 1, 'has_chi_square_with': 1 } for key, qedge in edges.items(): predicate = qedge.predicate if isinstance(predicate, list): if len(predicate) == 0: predicate = None else: predicate = predicate[ 0] # FIXME Hack before dealing with predicates as lists! if predicate is not None and predicate in virtual_edge_predicates: continue edge_info[key] = { 'key': key, 'has_predicate': False, 'subject': qedge.subject, 'object': qedge.object, 'predicate': None } if predicate is not None: edge_info[key]['has_predicate'] = True edge_info[key]['predicate'] = predicate if key is None: response.error( "QueryGraph has a edge with null key. This is not permitted", error_code="QueryGraphEdgeWithNoKey") return response #### Create a unique node link string link_string = ','.join(sorted([qedge.subject, qedge.object])) if link_string not in unique_links: node_info[qedge.subject]['n_links'] += 1 node_info[qedge.object]['n_links'] += 1 unique_links[link_string] = 1 #print(link_string) node_info[qedge.subject]['n_edges'] += 1 node_info[qedge.object]['n_edges'] += 1 node_info[qedge.subject]['is_connected'] = True node_info[qedge.object]['is_connected'] = True #node_info[qedge.subject]['edges'].append(edge_info[key]) #node_info[qedge.object]['edges'].append(edge_info[key]) node_info[qedge.subject]['edges'].append(edge_info[key]) node_info[qedge.object]['edges'].append(edge_info[key]) node_info[qedge.subject]['edge_dict'][key] = edge_info[key] node_info[qedge.object]['edge_dict'][key] = edge_info[key] #### Store lookup of predicates warning_counter = 0 edge_predicate = 'any' if predicate is None: if warning_counter == 0: response.debug( "QueryGraph has edges with no predicate. This may cause problems with results inference later" ) warning_counter += 1 else: edge_predicate = predicate #### It's not clear yet whether we need to store the whole sentence or just the predicate #predicate_encoding = f"{node_info[qedge.subject]['predicate']}---{edge_predicate}---{node_info[qedge.object]['predicate']}" predicate_encoding = edge_predicate self.edge_predicate_map[predicate_encoding] = key #### Loop through the nodes again, trying to identify the start_node and the end_node singletons = [] for node_id, node_data in node_info.items(): if node_data['n_links'] < 2: singletons.append(node_data) elif node_data['n_links'] > 2: self.is_bifurcated_graph = True response.warning( "QueryGraph appears to have a fork in it. This might cause trouble" ) #### If this doesn't produce any singletons, then try curie based selection if len(singletons) == 0: for node_id, node_data in node_info.items(): if node_data['has_id']: singletons.append(node_data) #### If this doesn't produce any singletons, then we don't know how to continue if len(singletons) == 0: response.error("Unable to understand the query graph", error_code="QueryGraphCircular") return response #### Try to identify the start_node and the end_node start_node = singletons[0] if len(nodes) == 1: # Just a single node, fine pass elif len(singletons) < 2: response.warning( "QueryGraph appears to be circular or has a strange geometry. This might cause trouble" ) elif len(singletons) > 2: response.warning( "QueryGraph appears to have a fork in it. This might cause trouble" ) else: if singletons[0]['has_id'] is True and singletons[1][ 'has_id'] is False: start_node = singletons[0] elif singletons[0]['has_id'] is False and singletons[1][ 'has_id'] is True: start_node = singletons[1] else: start_node = singletons[0] #### Hmm, that's not very robust against odd graphs. This needs work. FIXME self.node_info = node_info self.edge_info = edge_info self.start_node = start_node current_node = start_node node_order = [start_node] edge_order = [] edges = current_node['edges'] debug = False while 1: if debug: tmp = { 'astate': '1', 'current_node': current_node, 'node_order': node_order, 'edge_order': edge_order, 'edges': edges } print( json.dumps(ast.literal_eval(repr(tmp)), sort_keys=True, indent=2)) print( '==================================================================================' ) tmp = input() if len(edges) == 0: break #if len(edges) > 1: if current_node['n_links'] > 1: response.error( f"Help, two edges at A583. Don't know what to do: {current_node['n_links']}", error_code="InteralErrorA583") return response edge_order.append(edges[0]) previous_node = current_node if edges[0]['subject'] == current_node['key']: current_node = node_info[edges[0]['object']] elif edges[0]['object'] == current_node['key']: current_node = node_info[edges[0]['subject']] else: response.error("Help, edge error A584. Don't know what to do", error_code="InteralErrorA584") return response node_order.append(current_node) #tmp = { 'astate': '2', 'current_node': current_node, 'node_order': node_order, 'edge_order': edge_order, 'edges': edges } #print(json.dumps(ast.literal_eval(repr(tmp)),sort_keys=True,indent=2)) #print('==================================================================================') #tmp = input() edges = current_node['edges'] new_edges = [] for edge in edges: key = edge['key'] if key not in previous_node['edge_dict']: new_edges.append(edge) edges = new_edges if len(edges) == 0: break #tmp = { 'astate': '3', 'current_node': current_node, 'node_order': node_order, 'edge_order': edge_order, 'edges': edges } #print(json.dumps(ast.literal_eval(repr(tmp)),sort_keys=True,indent=2)) #print('==================================================================================') #tmp = input() self.node_order = node_order self.edge_order = edge_order # Create a text rendering of the QueryGraph geometry for matching against a template self.query_graph_templates = { 'simple': '', 'detailed': { 'n_nodes': len(node_order), 'components': [] } } node_index = 0 edge_index = 0 #print(json.dumps(ast.literal_eval(repr(node_order)),sort_keys=True,indent=2)) for node in node_order: component_id = f"n{node_index:02}" content = '' component = { 'component_type': 'node', 'component_id': component_id, 'has_id': node['has_id'], 'has_category': node['has_category'], 'category_value': None } self.query_graph_templates['detailed']['components'].append( component) if node['has_id']: content = 'id' elif node['has_category'] and node[ 'node_object'].category is not None: content = f"category={node['node_object'].category}" component['category_value'] = node['node_object'].category elif node['has_category']: content = 'category' template_part = f"{component_id}({content})" self.query_graph_templates['simple'] += template_part # Since queries with intermediate nodes that are not is_set=true tend to blow up, for now, make them is_set=true unless explicitly set to false if node_index > 0 and node_index < (self.n_nodes - 1): if 'is_set' not in node or node['is_set'] is None: node['node_object'].is_set = True response.warning( f"Setting unspecified is_set to true for {node['key']} because this will probably lead to a happier result" ) elif node['is_set'] is True: response.debug( f"Value for is_set is already true for {node['key']} so that's good" ) elif node['is_set'] is False: #response.info(f"Value for is_set is set to false for intermediate node {node['key']}. This could lead to weird results. Consider setting it to true") response.info( f"Value for is_set is false for intermediate node {node['key']}. Setting to true because this will probably lead to a happier result" ) node['node_object'].is_set = True #else: # response.error(f"Unrecognized value is_set='{node['is_set']}' for {node['key']}. This should be true or false") node_index += 1 if node_index < self.n_nodes: #print(json.dumps(ast.literal_eval(repr(node)),sort_keys=True,indent=2)) #### Extract the has_predicate and predicate_value from the edges of the node #### This could fail if there are two edges coming out of the node FIXME has_predicate = False predicate_value = None if 'edges' in node: for related_edge in node['edges']: if related_edge['subject'] == node['key']: has_predicate = related_edge['has_predicate'] if has_predicate is True and 'predicate' in related_edge: predicate_value = related_edge['predicate'] component_id = f"e{edge_index:02}" template_part = f"-{component_id}()-" self.query_graph_templates['simple'] += template_part component = { 'component_type': 'edge', 'component_id': component_id, 'has_id': False, 'has_predicate': has_predicate, 'predicate_value': predicate_value } self.query_graph_templates['detailed']['components'].append( component) edge_index += 1 response.debug( f"The QueryGraph reference template is: {self.query_graph_templates['simple']}" ) #tmp = { 'node_info': node_info, 'edge_info': edge_info, 'start_node': start_node, 'n_nodes': self.n_nodes, 'n_edges': self.n_edges, # 'is_bifurcated_graph': self.is_bifurcated_graph, 'node_order': node_order, 'edge_order': edge_order } #print(json.dumps(ast.literal_eval(repr(tmp)),sort_keys=True,indent=2)) #sys.exit(0) #### Return the response return response
def apply_fetch_message(self, message, input_parameters, describe=False): """ Adds a new QEdge object to the QueryGraph inside the Message object :return: ARAXResponse object with execution information :rtype: ARAXResponse """ # #### Command definition for autogenerated documentation command_definition = { 'dsl_command': 'fetch_message()', 'description': """The `fetch_message` command fetches a remote Message by its id and can then allow further processing on it.""", 'parameters': { 'id': { 'is_required': True, 'examples': ['https://arax.ncats.io/api/arax/v1.0/message/1'], 'default': '', 'type': 'string', 'description': """A URL/URI that identifies the Message to be fetched""", }, } } if describe: return command_definition #### Define a default response response = ARAXResponse() self.response = response self.message = message #### Basic checks on arguments if not isinstance(input_parameters, dict): response.error("Provided parameters is not a dict", error_code="ParametersNotDict") return response #### Define a complete set of allowed parameters and their defaults parameters = { 'uri': None, } #### Loop through the input_parameters and override the defaults and make sure they are allowed for key, value in input_parameters.items(): if key not in parameters: response.error(f"Supplied parameter {key} is not permitted", error_code="UnknownParameter") else: parameters[key] = value #### Return if any of the parameters generated an error (showing not just the first one) if response.status != 'OK': return response #### Store these final parameters for convenience response.data['parameters'] = parameters self.parameters = parameters #### Basic checks on arguments message_uri = input_parameters['uri'] if not isinstance(message_uri, str): response.error("Provided parameter is not a string", error_code="ParameterNotString") return response response.info(f"Fetching Message via GET to '{message_uri}'") response_content = requests.get(message_uri, headers={'accept': 'application/json'}) status_code = response_content.status_code if status_code != 200: response.error( f"GET to '{message_uri}' returned HTTP code {status_code} and content '{response_content.content}'", error_code="GETFailed") response.error( f"GET to '{message_uri}' returned HTTP code {status_code}", error_code="GETFailed") return response #### Unpack the response content into a dict and dump try: response_dict = response_content.json() message = self.from_dict(response_dict) except: response.error( f"Error converting response from '{message_uri}' to objects from content", error_code="UnableToParseContent") return response #### Store the decoded message and return response self.message = message n_results = 0 n_qg_nodes = 0 n_kg_nodes = 0 if message.results is not None and isinstance(message.results, list): n_results = len(message.results) if message.query_graph is not None and isinstance( message.query_graph, QueryGraph) and isinstance( message.query_graph.nodes, list): n_qg_nodes = len(message.query_graph.nodes) if message.knowledge_graph is not None and isinstance( message.knowledge_graph, KnowledgeGraph) and isinstance( message.knowledge_graph.nodes, list): n_kg_nodes = len(message.knowledge_graph.nodes) response.info( f"Retreived Message with {n_qg_nodes} QueryGraph nodes, {n_kg_nodes} KnowledgeGraph nodes, and {n_results} results" ) return response
def check_for_query_graph_tags(self, message, query_graph_info): #### Define a default response response = ARAXResponse() self.response = response self.message = message response.debug(f"Checking KnowledgeGraph for QueryGraph tags") #### Get shorter handles knowledge_graph = message.knowledge_graph nodes = knowledge_graph.nodes edges = knowledge_graph.edges #### Store number of nodes and edges self.n_nodes = len(nodes) self.n_edges = len(edges) response.debug(f"Found {self.n_nodes} nodes and {self.n_edges} edges") #### Clear the maps self.node_map = {'by_qnode_id': {}} self.edge_map = {'by_qedge_id': {}} #### Loop through nodes computing some stats n_nodes_with_query_graph_ids = 0 for key, node in nodes.items(): if node.qnode_id is None: continue n_nodes_with_query_graph_ids += 1 #### Place an entry in the node_map if node.qnode_id not in self.node_map['by_qnode_id']: self.node_map['by_qnode_id'][node.qnode_id] = {} self.node_map['by_qnode_id'][node.qnode_id][key] = 1 #### Tally the stats if n_nodes_with_query_graph_ids == self.n_nodes: self.query_graph_id_node_status = 'all nodes have query_graph_ids' elif n_nodes_with_query_graph_ids == 0: self.query_graph_id_node_status = 'no nodes have query_graph_ids' else: self.query_graph_id_node_status = 'only some nodes have query_graph_ids' response.info( f"In the KnowledgeGraph, {self.query_graph_id_node_status}") #### Loop through edges computing some stats n_edges_with_query_graph_ids = 0 for key, edge in edges.items(): if edge.qedge_id is None: continue n_edges_with_query_graph_ids += 1 #### Place an entry in the edge_map if edge.qedge_id not in self.edge_map['by_qedge_id']: self.edge_map['by_qedge_id'][edge.qedge_id] = {} self.edge_map['by_qedge_id'][edge.qedge_id][key] = 1 if n_edges_with_query_graph_ids == self.n_edges: self.query_graph_id_edge_status = 'all edges have query_graph_ids' elif n_edges_with_query_graph_ids == 0: self.query_graph_id_edge_status = 'no edges have query_graph_ids' else: self.query_graph_id_edge_status = 'only some edges have query_graph_ids' response.info( f"In the KnowledgeGraph, {self.query_graph_id_edge_status}") #### Return the response return response
def parse(self, input_actions): #### Define a default response response = ARAXResponse() response.info(f"Parsing input actions list") #### Basic error checking of the input_actions if not isinstance(input_actions, list): response.error("Provided input actions is not a list", error_code="ActionsNotList") return response if len(input_actions) == 0: response.error("Provided input actions is an empty list", error_code="ActionsListEmpty") return response #### Iterate through the list, checking the items actions = [] n_lines = 1 for action in input_actions: response.debug(f"Parsing action: {action}") # If this line is empty, then skip match = re.match(r"\s*$", action) if match: continue # If this line begins with a #, it is a comment, then skip match = re.match(r"#", action) if match: continue #### First look for a naked command without parentheses match = re.match(r"\s*([A-Za-z_]+)\s*$", action) if match is not None: action = { "line": n_lines, "command": match.group(1), "parameters": None } actions.append(action) #### Then look for and parse a command with parentheses and a comma-separated parameter list if match is None: match = re.match(r"\s*([A-Za-z_]+)\((.*)\)\s*$", action) if match is not None: command = match.group(1) param_string = match.group(2) #### Split the parameters on comma and process those param_string_list = re.split(",", param_string) parameters = {} #### If a value is of the form key=[value1,value2] special code is needed to recompose that mode = 'normal' list_buffer = [] key = '' for param_item in param_string_list: param_item = param_item.strip() if mode == 'normal': #### Split on the first = only (might be = in the value) values = re.split("=", param_item, 1) key = values[0] #### If there isn't a value after an =, then just set to string true value = 'true' if len(values) > 1: value = values[1] key = key.strip() value = value.strip() #### If the value begins with a "[", then this is a list match = re.match(r"\[(.+)$", value) if match: #### If it also ends with a "]", then this is a list of one element match2 = re.match(r"\[(.*)\]$", value) if match2: if match2.group(1) == '': parameters[key] = [] else: parameters[key] = [match2.group(1)] else: mode = 'in_list' list_buffer = [match.group(1)] else: parameters[key] = value #### Special processing if we're in the middle of a list elif mode == 'in_list': match = re.match(r"(.*)\]$", param_item) if match: mode = 'normal' list_buffer.append(match.group(1)) parameters[key] = list_buffer else: list_buffer.append(param_item) else: eprint("Inconceivable!") if mode == 'in_list': parameters[key] = list_buffer #### Store the parsed result in a dict and add to the list action = { "line": n_lines, "command": command, "parameters": parameters } actions.append(action) else: response.error(f"Unable to parse action {action}", error_code="ActionsListEmpty") n_lines += 1 #### Put the actions in the response data envelope and return response.data["actions"] = actions return response