Пример #1
0
 def from_dict(self, query_graph_dict):
     query_graph = QueryGraph()
     query_graph.nodes = []
     query_graph.edges = []
     if "nodes" in query_graph_dict:
         for node in query_graph_dict["nodes"]:
             qnode = QNode().from_dict(node)
             query_graph.nodes.append(qnode)
     if "edges" in query_graph_dict:
         for edge in query_graph_dict["edges"]:
             qedge = QEdge().from_dict(edge)
             query_graph.edges.append(qedge)
     return query_graph
Пример #2
0
def copy_qnode(old_qnode: QNode) -> QNode:
    new_qnode = QNode()
    for node_property in new_qnode.to_dict():
        value = getattr(old_qnode, node_property)
        setattr(new_qnode, node_property, value)
    return new_qnode
Пример #3
0
    def add_qnode(self, message, input_parameters, describe=False):
        """
        Adds a new QNode object to the QueryGraph inside the Message object
        :return: Response object with execution information
        :rtype: Response
        """

        # #### Internal documentation setup
        allowable_parameters = {
            'id': {
                'Any string that is unique among all QNode id fields, with recommended format n00, n01, n02, etc.'
            },
            'curie': {
                'Any compact URI (CURIE) (e.g. DOID:9281) (May also be a list like [UniProtKB:P12345,UniProtKB:Q54321])'
            },
            'name': {
                'Any name of a bioentity that will be resolved into a CURIE if possible or result in an error if not (e.g. hypertension, insulin)'
            },
            'type': {
                'Any valid Translator bioentity type (e.g. protein, chemical_substance, disease)'
            },
            'is_set': {
                'If set to true, this QNode represents a set of nodes that are all in common between the two other linked QNodes'
            },
        }
        if describe:
            allowable_parameters[
                'dsl_command'] = '`add_qnode()`'  # can't get this name at run-time, need to manually put it in per https://www.python.org/dev/peps/pep-3130/
            allowable_parameters[
                'brief_description'] = """The `add_qnode` method adds an additional QNode to the QueryGraph in the Message object. Currently
                when a curie or name is specified, this method will only return success if a matching node is found in the KG1/KG2 KGNodeIndex."""
            return allowable_parameters

        #### Define a default response
        response = Response()
        self.response = response
        self.message = message

        #### Basic checks on arguments
        if not isinstance(input_parameters, dict):
            response.error("Provided parameters is not a dict",
                           error_code="ParametersNotDict")
            return response

        #### Define a complete set of allowed parameters and their defaults
        parameters = {
            'id': None,
            'curie': None,
            'name': None,
            'type': None,
            'is_set': None,
        }

        #### Loop through the input_parameters and override the defaults and make sure they are allowed
        for key, value in input_parameters.items():
            if key not in parameters:
                response.error(f"Supplied parameter {key} is not permitted",
                               error_code="UnknownParameter")
            else:
                parameters[key] = value
        #### Return if any of the parameters generated an error (showing not just the first one)
        if response.status != 'OK':
            return response

        #### Store these final parameters for convenience
        response.data['parameters'] = parameters
        self.parameters = parameters

        #### Now apply the filters. Order of operations is probably quite important
        #### Scalar value filters probably come first like minimum_confidence, then complex logic filters
        #### based on edge or node properties, and then finally maximum_results
        response.info(
            f"Adding a QueryNode to Message with parameters {parameters}")

        #### Make sure there's a query_graph already here
        if message.query_graph is None:
            message.query_graph = QueryGraph()
            message.query_graph.nodes = []
            message.query_graph.edges = []
        if message.query_graph.nodes is None:
            message.query_graph.nodes = []

        #### Set up the KGNodeIndex
        kgNodeIndex = KGNodeIndex()

        # Create the QNode and set the id
        qnode = QNode()
        if parameters['id'] is not None:
            id = parameters['id']
        else:
            id = self.__get_next_free_node_id()
        qnode.id = id

        # Set the is_set parameter to what the user selected
        if parameters['is_set'] is not None:
            qnode.is_set = (parameters['is_set'].lower() == 'true')

        #### If the CURIE is specified, try to find that
        if parameters['curie'] is not None:

            # If the curie is a scalar then treat it here as a list of one
            if isinstance(parameters['curie'], str):
                curie_list = [parameters['curie']]
                is_curie_a_list = False
                if parameters['is_set'] is not None and qnode.is_set is True:
                    response.error(
                        f"Specified CURIE '{parameters['curie']}' is a scalar, but is_set=true, which doesn't make sense",
                        error_code="CurieScalarButIsSetTrue")
                    return response

            # Or else set it up as a list
            elif isinstance(parameters['curie'], list):
                curie_list = parameters['curie']
                is_curie_a_list = True
                qnode.curie = []
                if parameters['is_set'] is None:
                    response.warning(
                        f"Specified CURIE '{parameters['curie']}' is a list, but is_set was not set to true. It must be true in this context, so automatically setting to true. Avoid this warning by explictly setting to true."
                    )
                    qnode.is_set = True
                else:
                    if qnode.is_set == False:
                        response.warning(
                            f"Specified CURIE '{parameters['curie']}' is a list, but is_set=false, which doesn't make sense, so automatically setting to true. Avoid this warning by explictly setting to true."
                        )
                        qnode.is_set = True

            # Or if it's neither a list or a string, then error out. This cannot be handled at present
            else:
                response.error(
                    f"Specified CURIE '{parameters['curie']}' is neither a string nor a list. This cannot to handled",
                    error_code="CurieNotListOrScalar")
                return response

            # Loop over the available curies and create the list
            for curie in curie_list:
                response.debug(f"Looking up CURIE {curie} in KgNodeIndex")
                nodes = kgNodeIndex.get_curies_and_types(curie, kg_name='KG2')

                # If nothing was found, we won't bail out, but rather just issue a warning
                if len(nodes) == 0:
                    response.warning(
                        f"A node with CURIE {curie} is not in our knowledge graph KG2, but will continue"
                    )
                    if is_curie_a_list:
                        qnode.curie.append(curie)
                    else:
                        qnode.curie = curie

                else:

                    # FIXME. This is just always taking the first result. This could cause problems for CURIEs with multiple types. Is that possible?
                    # In issue #623 on 2020-06-15 we concluded that we should not specify the type here
                    #qnode.type = nodes[0]['type']

                    # Either append or set the found curie
                    if is_curie_a_list:
                        qnode.curie.append(nodes[0]['curie'])
                    else:
                        qnode.curie = nodes[0]['curie']

                if 'type' in parameters and parameters['type'] is not None:
                    if isinstance(parameters['type'], str):
                        qnode.type = parameters['type']
                    else:
                        qnode.type = parameters['type'][0]

            message.query_graph.nodes.append(qnode)
            return response

        #### If the name is specified, try to find that
        if parameters['name'] is not None:
            response.debug(
                f"Looking up CURIE {parameters['name']} in KgNodeIndex")
            nodes = kgNodeIndex.get_curies_and_types(parameters['name'])
            if len(nodes) == 0:
                nodes = kgNodeIndex.get_curies_and_types(parameters['name'],
                                                         kg_name='KG2')
                if len(nodes) == 0:
                    response.error(
                        f"A node with name '{parameters['name']}'' is not in our knowledge graph",
                        error_code="UnknownCURIE")
                    return response
            qnode.curie = nodes[0]['curie']
            qnode.type = nodes[0]['type']
            message.query_graph.nodes.append(qnode)
            return response

        #### If the type is specified, just add that type. There should be checking that it is legal. FIXME
        if parameters['type'] is not None:
            qnode.type = parameters['type']
            if parameters['is_set'] is not None:
                qnode.is_set = (parameters['is_set'].lower() == 'true')
            message.query_graph.nodes.append(qnode)
            return response

        #### If we get here, it means that all three main parameters are null. Just a generic node with no type or anything. This is okay.
        message.query_graph.nodes.append(qnode)
        return response
Пример #4
0
    def answer(self, entity, use_json=False):
        """
		Answer a question of the type "What is X" but is general:
		:param entity: KG neo4j node name (eg "carbetocin")
		:param use_json: If the answer should be in Translator standardized API output format
		:return: a description and type of the node
		"""

        #### See if this entity is in the KG via the index
        eprint("Looking up '%s' in KgNodeIndex" % entity)
        kgNodeIndex = KGNodeIndex()
        curies = kgNodeIndex.get_curies(entity)

        #### If not in the KG, then return no information
        if not curies:
            if not use_json:
                return None
            else:
                error_code = "TermNotFound"
                error_message = "This concept is not in our knowledge graph"
                response = FormatOutput.FormatResponse(0)
                response.add_error_message(error_code, error_message)
                return response.message

        # Get label/kind of node the source is
        eprint("Getting properties for '%s'" % curies[0])
        properties = RU.get_node_properties(curies[0])
        eprint("Properties are:")
        eprint(properties)

        #### By default, return the results just as a plain simple list of data structures
        if not use_json:
            return properties

        #### Or, if requested, format the output as the standardized API output format
        else:
            #### Create a stub Message object
            response = FormatOutput.FormatResponse(0)
            response.message.table_column_names = [
                "id", "type", "name", "description", "uri"
            ]
            response.message.code_description = None

            #### Create a Node object and fill it
            node1 = Node()
            node1.id = properties["id"]
            node1.uri = properties["uri"]
            node1.type = [properties["category"]]
            node1.name = properties["name"]
            node1.description = properties["description"]

            #### Create the first result (potential answer)
            result1 = Result()
            result1.id = "http://arax.ncats.io/api/v1/result/0000"
            result1.description = "The term %s is in our knowledge graph and is defined as %s" % (
                properties["name"], properties["description"])
            result1.confidence = 1.0
            result1.essence = properties["name"]
            result1.essence_type = properties["category"]
            node_types = ",".join(node1.type)
            result1.row_data = [
                node1.id, node_types, node1.name, node1.description, node1.uri
            ]

            #### Create a KnowledgeGraph object and put the list of nodes and edges into it
            result_graph = KnowledgeGraph()
            result_graph.nodes = [node1]
            result_graph.edges = []

            #### Put the ResultGraph into the first result (potential answer)
            result1.result_graph = result_graph

            #### Put the first result (potential answer) into the message
            results = [result1]
            response.message.results = results

            #### Also put the union of all result_graph components into the top Message KnowledgeGraph
            #### Normally the knowledge_graph will be much more complex than this, but take a shortcut for this single-node result
            response.message.knowledge_graph = result_graph

            #### Also manufacture a query_graph post hoc
            qnode1 = QNode()
            qnode1.id = "n00"
            qnode1.curie = properties["id"]
            qnode1.type = None
            query_graph = QueryGraph()
            query_graph.nodes = [qnode1]
            query_graph.edges = []
            response.message.query_graph = query_graph

            #### Create the corresponding knowledge_map
            node_binding = NodeBinding(qg_id="n00", kg_id=properties["id"])
            result1.node_bindings = [node_binding]
            result1.edge_bindings = []

            #eprint(response.message)
            return response.message
    def answer(source_node_ID,
               target_node_type,
               association_node_type,
               use_json=False,
               threshold=0.2,
               n=20):
        """
		Answers the question what X are similar to Y based on overlap of common Z nodes. X is target_node_type,
		Y is source_node_ID, Z is association_node_type. The relationships are automatically determined in
		SimilarNodesInCommon by looking for 1 hop relationships and poping the FIRST one (you are warned).
		:param source_node_ID: actual name in the KG
		:param target_node_type: kinds of nodes you want returned
		:param association_node_type: kind of node you are computing the Jaccard overlap on
		:param use_json: print the results in standardized format
		:param threshold: only return results where jaccard is >= this threshold
		:param n: number of results to return (default 20)
		:return: reponse (or printed text)
		"""

        # Initialize the response class
        response = FormatOutput.FormatResponse(5)
        # add the column names for the row data
        response.message.table_column_names = [
            "source name", "source ID", "target name", "target ID",
            "Jaccard index"
        ]

        # Initialize the similar nodes class
        similar_nodes_in_common = SimilarNodesInCommon.SimilarNodesInCommon()

        # get the description
        source_node_description = RU.get_node_property(source_node_ID, 'name')

        # get the source node label
        source_node_label = RU.get_node_property(source_node_ID, 'label')

        # Get the nodes in common
        node_jaccard_tuples_sorted, error_code, error_message = similar_nodes_in_common.get_similar_nodes_in_common_source_target_association(
            source_node_ID, target_node_type, association_node_type, threshold)

        # reduce to top 100
        if len(node_jaccard_tuples_sorted) > n:
            node_jaccard_tuples_sorted = node_jaccard_tuples_sorted[0:n]

        # make sure that the input node isn't in the list
        node_jaccard_tuples_sorted = [
            i for i in node_jaccard_tuples_sorted if i[0] != source_node_ID
        ]

        # check for an error
        if error_code is not None or error_message is not None:
            if not use_json:
                print(error_message)
                return
            else:
                response.add_error_message(error_code, error_message)
                response.print()
                return

        #### If use_json not specified, then return results as a fairly plain list
        if not use_json:
            to_print = "The %s's involving similar %ss as %s are: \n" % (
                target_node_type, association_node_type,
                source_node_description)
            for other_disease_ID, jaccard in node_jaccard_tuples_sorted:
                to_print += "%s\t%s\tJaccard %f\n" % (
                    other_disease_ID,
                    RU.get_node_property(other_disease_ID, 'name'), jaccard)
            print(to_print)

        #### Else if use_json requested, return the results in the Translator standard API JSON format
        else:

            #### Create the QueryGraph for this type of question
            query_graph = QueryGraph()
            source_node = QNode()
            source_node.id = "n00"
            source_node.curie = source_node_ID
            source_node.type = source_node_label
            association_node = QNode()
            association_node.id = "n01"
            association_node.type = association_node_type
            association_node.is_set = True
            target_node = QNode()
            target_node.id = "n02"
            target_node.type = target_node_type
            query_graph.nodes = [source_node, association_node, target_node]

            #source_association_relationship_type = "unknown1"
            edge1 = QEdge()
            edge1.id = "en00-n01"
            edge1.source_id = "n00"
            edge1.target_id = "n01"
            #edge1.type = source_association_relationship_type

            #association_target_relationship_type = "unknown2"
            edge2 = QEdge()
            edge2.id = "en01-n02"
            edge2.source_id = "n01"
            edge2.target_id = "n02"
            #edge2.type = association_target_relationship_type

            query_graph.edges = [edge1, edge2]

            #### DONT Suppress the query_graph because we can now do the knowledge_map with v0.9.1
            response.message.query_graph = query_graph

            #### Create a mapping dict with the source curie and node types and edge types. This dict is used for reverse lookups by type
            #### for mapping to the QueryGraph. There is a potential point of failure here if there are duplicate node or edge types. FIXME
            response._type_map = dict()
            response._type_map[source_node.curie] = source_node.id
            response._type_map[association_node.type] = association_node.id
            response._type_map[target_node.type] = target_node.id
            response._type_map["e" + edge1.source_id + "-" +
                               edge1.target_id] = edge1.id
            response._type_map["e" + edge2.source_id + "-" +
                               edge2.target_id] = edge2.id

            #### Extract the sorted IDs from the list of tuples
            node_jaccard_ID_sorted = [
                id for id, jac in node_jaccard_tuples_sorted
            ]

            # print(RU.return_subgraph_through_node_labels(source_node_ID, source_node_label, node_jaccard_ID_sorted, target_node_type,
            #										[association_node_type], with_rel=[], directed=True, debug=True))

            # get the entire subgraph
            g = RU.return_subgraph_through_node_labels(source_node_ID,
                                                       source_node_label,
                                                       node_jaccard_ID_sorted,
                                                       target_node_type,
                                                       [association_node_type],
                                                       with_rel=[],
                                                       directed=False,
                                                       debug=False)

            # extract the source_node_number
            for node, data in g.nodes(data=True):
                if data['properties']['id'] == source_node_ID:
                    source_node_number = node
                    break

            # Get all the target numbers
            target_id2numbers = dict()
            node_jaccard_ID_sorted_set = set(node_jaccard_ID_sorted)
            for node, data in g.nodes(data=True):
                if data['properties']['id'] in node_jaccard_ID_sorted_set:
                    target_id2numbers[data['properties']['id']] = node

            for other_disease_ID, jaccard in node_jaccard_tuples_sorted:
                target_name = RU.get_node_property(other_disease_ID, 'name')
                to_print = "The %s %s involves similar %ss as %s with similarity value %f" % (
                    target_node_type, target_name, association_node_type,
                    source_node_description, jaccard)

                # get all the shortest paths between source and target
                all_paths = nx.all_shortest_paths(
                    g, source_node_number, target_id2numbers[other_disease_ID])

                # get all the nodes on these paths
                #try:
                if 1 == 1:
                    rel_nodes = set()
                    for path in all_paths:
                        for node in path:
                            rel_nodes.add(node)

                    if rel_nodes:
                        # extract the relevant subgraph
                        sub_g = nx.subgraph(g, rel_nodes)

                        # add it to the response
                        res = response.add_subgraph(sub_g.nodes(data=True),
                                                    sub_g.edges(data=True),
                                                    to_print,
                                                    jaccard,
                                                    return_result=True)
                        res.essence = "%s" % target_name  # populate with essence of question result
                        res.essence_type = target_node_type
                        row_data = []  # initialize the row data
                        row_data.append("%s" % source_node_description)
                        row_data.append("%s" % source_node_ID)
                        row_data.append("%s" % target_name)
                        row_data.append("%s" % other_disease_ID)
                        row_data.append("%f" % jaccard)
                        res.row_data = row_data


#				except:
#					pass
            response.print()
Пример #6
0
    def answer(self,
               source_name,
               target_label,
               relationship_type,
               use_json=False,
               directed=False):
        """
		Answer a question of the type "What proteins does drug X target" but is general:
		 what <node X type> does <node Y grounded> <relatioship Z> that can be answered in one hop in the KG (increasing the step size if necessary).
		:param query_terms: a triple consisting of a source node name (KG neo4j node name, the target label (KG neo4j
		"node label") and the relationship type (KG neo4j "Relationship type")
		:param source_name: KG neo4j node name (eg "carbetocin")
		:param target_label: KG node label (eg. "protein")
		:param relationship_type: KG relationship type (eg. "physically_interacts_with")
		:param use_json: If the answer should be in Eric's Json standardized API output format
		:return: list of dictionaries containing the nodes that are one hop (along relationship type) that connect source to target.
		"""
        # Get label/kind of node the source is
        source_label = RU.get_node_property(source_name, "label")

        # Get the subgraph (all targets along relationship)
        has_intermediate_node = False
        try:
            g = RU.return_subgraph_paths_of_type(source_name,
                                                 source_label,
                                                 None,
                                                 target_label,
                                                 [relationship_type],
                                                 directed=directed)
        except CustomExceptions.EmptyCypherError:
            try:
                has_intermediate_node = True
                g = RU.return_subgraph_paths_of_type(
                    source_name,
                    source_label,
                    None,
                    target_label, ['subclass_of', relationship_type],
                    directed=directed)
            except CustomExceptions.EmptyCypherError:
                error_message = "No path between %s and %s via relationship %s" % (
                    source_name, target_label, relationship_type)
                error_code = "NoPathsFound"
                response = FormatOutput.FormatResponse(3)
                response.add_error_message(error_code, error_message)
                return response

        # extract the source_node_number
        for node, data in g.nodes(data=True):
            if data['properties']['id'] == source_name:
                source_node_number = node
                break

        # Get all the target numbers
        target_numbers = []
        for node, data in g.nodes(data=True):
            if data['properties']['id'] != source_name:
                target_numbers.append(node)

        # if there's an intermediate node, get the name
        if has_intermediate_node:
            neighbors = list(g.neighbors(source_node_number))
            if len(neighbors) > 1:
                error_message = "More than one intermediate node"
                error_code = "AmbiguousPath"
                response = FormatOutput.FormatResponse(3)
                response.add_error_message(error_code, error_message)
                return response
            else:
                intermediate_node = neighbors.pop()

        #### If use_json not specified, then return results as a fairly plain list
        if not use_json:
            results_list = list()
            for target_number in target_numbers:
                data = g.nodes[target_number]
                results_list.append({
                    'type':
                    list(set(data['labels']) - {'Base'}).pop(),
                    'name':
                    data['properties']['name'],
                    'desc':
                    data['properties']['name'],
                    'prob':
                    1
                })  # All these are known to be true
            return results_list

        #### Else if use_json requested, return the results in the Translator standard API JSON format
        else:
            response = FormatOutput.FormatResponse(3)  # it's a Q3 question
            response.message.table_column_names = [
                "source name", "source ID", "target name", "target ID"
            ]
            source_description = g.nodes[source_node_number]['properties'][
                'name']

            #### Create the QueryGraph for this type of question
            query_graph = QueryGraph()
            source_node = QNode()
            source_node.id = "n00"
            source_node.curie = g.nodes[source_node_number]['properties']['id']
            source_node.type = g.nodes[source_node_number]['properties'][
                'category']
            target_node = QNode()
            target_node.id = "n01"
            target_node.type = target_label
            query_graph.nodes = [source_node, target_node]
            edge1 = QEdge()
            edge1.id = "e00"
            edge1.source_id = "n00"
            edge1.target_id = "n01"
            edge1.type = relationship_type
            query_graph.edges = [edge1]
            response.message.query_graph = query_graph

            #### Create a mapping dict with the source curie and the target type. This dict is used for reverse lookups by type
            #### for mapping to the QueryGraph.
            response._type_map = dict()
            response._type_map[source_node.curie] = source_node.id
            response._type_map[target_node.type] = target_node.id
            response._type_map[edge1.type] = edge1.id

            #### Loop over all the returned targets and put them into the response structure
            for target_number in target_numbers:
                target_description = g.nodes[target_number]['properties'][
                    'name']
                if not has_intermediate_node:
                    subgraph = g.subgraph([source_node_number, target_number])
                else:
                    subgraph = g.subgraph(
                        [source_node_number, intermediate_node, target_number])
                res = response.add_subgraph(
                    subgraph.nodes(data=True),
                    subgraph.edges(data=True),
                    "%s and %s are connected by the relationship %s" %
                    (source_description, target_description,
                     relationship_type),
                    1,
                    return_result=True)
                res.essence = "%s" % target_description  # populate with essence of question result
                res.essence_type = g.nodes[target_number]['properties'][
                    'category']  # populate with the type of the essence of question result
                row_data = []  # initialize the row data
                row_data.append("%s" % source_description)
                row_data.append(
                    "%s" % g.nodes[source_node_number]['properties']['id'])
                row_data.append("%s" % target_description)
                row_data.append("%s" %
                                g.nodes[target_number]['properties']['id'])
                res.row_data = row_data
            return response
Пример #7
0
    def queryTerm(self, term):
        method = "queryTerm"
        attributes = self.findTermAttributesAndTypeByName(term)
        message = self.createMessage()
        if ( attributes["status"] == 'OK' ):
            message.code_description = "1 result found"
            message.table_column_names = [ "id", "type", "name", "description", "uri" ]

            #### Create a Node object and fill it
            node1 = Node()
            node1.id = "MESH:" + attributes["id"]
            node1.uri = "http://purl.obolibrary.org/obo/MESH_" + attributes["id"]
            node1.type = [ attributes["type"] ]
            node1.name = attributes["name"]
            node1.description = attributes["description"]

            #### Create the first result (potential answer)
            result1 = Result()
            result1.id = "http://rtx.ncats.io/api/v1/result/0000"
            result1.description = "The term " + attributes["name"] + " refers to " + attributes["description"]
            result1.confidence = 1.0
            result1.essence = attributes["name"]
            result1.essence_type = attributes["type"]
            node_types = ",".join(node1.type)
            result1.row_data = [ node1.id, node_types, node1.name, node1.description, node1.uri ]

            #### Create a KnowledgeGraph object and put the list of nodes and edges into it
            result_graph = KnowledgeGraph()
            result_graph.nodes = [ node1 ]

            #### Put the ResultGraph into the first result (potential answer)
            result1.result_graph = result_graph

            #### Put the first result (potential answer) into the message
            results = [ result1 ]
            message.results = results

            #### Also put the union of all result_graph components into the top Message KnowledgeGraph
            #### Normally the knowledge_graph will be much more complex than this, but take a shortcut for this single-node result
            message.knowledge_graph = result_graph

            #### Also manufacture a query_graph post hoc
            qnode1 = QNode()
            qnode1.node_id = "n00"
            qnode1.curie = "MESH:" + attributes["id"]
            qnode1.type = None
            query_graph = QueryGraph()
            query_graph.nodes = [ qnode1 ]
            query_graph.edges = []
            message.query_graph = query_graph

            #### Create the corresponding knowledge_map
            knowledge_map = { "n00": "MESH:" + attributes["id"] }
            result1.knowledge_map = knowledge_map

        else:
            message.message_code = "TermNotFound"
            message.code_description = "Unable to find this term in MeSH. No further information is available at this time."
            message.id = None

        return message
Пример #8
0
    def interpret_query_graph(self, query):
        """Try to interpret a QueryGraph and convert it into something RTX can process
        """

        #### Create a default response dict
        response = {
            "message_code": "InternalError",
            "code_description": "interpret_query_graph exited abnormally"
        }

        query_graph = query["message"]["query_graph"]
        nodes = query_graph["nodes"]
        edges = query_graph["edges"]
        n_nodes = len(nodes)
        n_edges = len(edges)
        eprint("DEBUG: n_nodes = %d, n_edges = %d" % (n_nodes, n_edges))

        #### Handle impossible cases
        if n_nodes == 0:
            response = {
                "message_code":
                "QueryGraphZeroNodes",
                "code_description":
                "Submitted QueryGraph has 0 nodes. At least 1 node is required"
            }
            return (response)
        if n_nodes == 1 and n_edges > 0:
            response = {
                "message_code":
                "QueryGraphTooManyEdges",
                "code_description":
                "Submitted QueryGraph may not have edges if there is only one node"
            }
            return (response)
        if n_nodes == 2 and n_edges > 1:
            response = {
                "message_code":
                "QueryGraphTooManyEdges",
                "code_description":
                "Submitted QueryGraph may not have more than 1 edge if there are only 2 nodes"
            }
            return (response)
        if n_nodes > 2:
            response = {
                "message_code":
                "UnsupportedQueryGraph",
                "code_description":
                "Submitted QueryGraph may currently only have 1 or 2 node. Support for 3 or more nodes coming soon."
            }
            return (response)

        #### Handle the single node case
        if n_nodes == 1:
            response = {
                "message_code": "OK",
                "code_description": "Interpreted QueryGraph as single node Q0"
            }
            response["id"] = "Q0"
            entity = nodes[0]["curie"]
            eprint("DEBUG: Q0 - entity = %s" % entity)
            response["terms"] = {"term": entity}
            response["original_question"] = "Submitted QueryGraph"
            response["restated_question"] = "What is %s?" % entity
            return (response)

        #### Handle the 2 node case
        if n_nodes == 2:
            eprint("DEBUG: Handling the 2-node case")
            source_type = None
            source_name = None
            target_type = None
            edge_type = None

            #### Loop through nodes trying to figure out which is the source and target
            for qnode in nodes:
                node = QNode.from_dict(qnode)

                if node.type == "gene":
                    if node.curie is None:
                        node.type = "protein"
                    else:
                        response = {
                            "message_code":
                            "UnsupportedNodeType",
                            "code_description":
                            "At least one of the nodes in the QueryGraph is a specific gene, which cannot be handled at the moment, a generic gene type with no curie is translated into a protein by RTX."
                        }
                        return (response)

                if node.curie is None:
                    if node.type is None:
                        response = {
                            "message_code":
                            "UnderspecifiedNode",
                            "code_description":
                            "At least one of the nodes in the QueryGraph has neither a CURIE nor a type. It must have one of those."
                        }
                        return (response)
                    else:
                        if target_type is None:
                            target_type = node.type
                        else:
                            response = {
                                "message_code":
                                "TooManyTargets",
                                "code_description":
                                "Both nodes have only types and are interpreted as targets. At least one node must have an exact identity."
                            }
                            return (response)
                else:
                    if re.match(r"'", node.curie):
                        response = {
                            "message_code":
                            "IllegalCharacters",
                            "code_description":
                            "Node type contains one or more illegal characters."
                        }
                        return (response)
                    if source_name is None:
                        if node.type is None:
                            response = {
                                "message_code":
                                "UnderspecifiedSourceNode",
                                "code_description":
                                "The source node must have a type in addition to a curie."
                            }
                            return (response)
                        else:
                            source_name = node.curie
                            source_type = node.type
                    else:
                        response = {
                            "message_code":
                            "OverspecifiedQueryGraph",
                            "code_description":
                            "All nodes in the QueryGraph have exact identities, so there is really nothing left to query."
                        }
                        return (response)

            #### Loop over the edges (should be just 1), ensuring that it has a type and recording it
            for qedge in edges:
                edge = QEdge.from_dict(qedge)
                if edge.type is None:
                    response = {
                        "message_code":
                        "EdgeWithNoType",
                        "code_description":
                        "At least one edge has no type. All edges must have a type."
                    }
                    return (response)
                else:
                    edge_type = edge.type

            #### Perform a crude sanitation of the input parameters to make sure the shell command won't fail or cause harm
            if re.match(r"'", edge_type) or re.match(
                    r"'", target_type) or re.match(r"'", source_name):
                response = {
                    "message_code":
                    "IllegalCharacters",
                    "code_description":
                    "The input query_graph entities contain one or more illegal characters."
                }
                return (response)

            #### Create the necessary components to hand off the queries to Q3Solution.py
            response = {
                "message_code":
                "OK",
                "code_description":
                "Interpreted QueryGraph as a single hop question"
            }
            response["id"] = "1hop"
            response["terms"] = {
                source_type: source_name,
                "target_label": target_type,
                "rel_type": edge_type
            }
            response["original_question"] = "Submitted QueryGraph"
            response[
                "restated_question"] = "Which %s(s) are connected to the %s %s via edge type %s?" % (
                    target_type, source_type, source_name, edge_type)
            #response["execution_string"] = "Q3Solution.py -s '%s' -t '%s' -r '%s' -j --directed" % (source_name,target_type,edge_type)
            response[
                "execution_string"] = "Q3Solution.py -s '%s' -t '%s' -r '%s' -j" % (
                    source_name, target_type, edge_type)
            return (response)

        return (response)