Exemplo n.º 1
0
def get_node_pairs_to_overlay(subject_qnode_key: str, object_qnode_key: str, query_graph: QueryGraph,
                              knowledge_graph: KnowledgeGraph, log: ARAXResponse) -> Set[Tuple[str, str]]:
    """
    This function determines which combinations of subject/object nodes in the KG need to be overlayed (e.g., have a
    virtual edge added between). It makes use of Resultify to determine what combinations of subject and object nodes
    may actually appear together in the same Results. (See issue #1069.) If it fails to narrow the node pairs for
    whatever reason, it defaults to returning all possible combinations of subject/object nodes.
    """
    log.debug(f"Narrowing down {subject_qnode_key}--{object_qnode_key} node pairs to overlay")
    kg_nodes_by_qg_id = get_node_ids_by_qg_id(knowledge_graph)
    kg_edges_by_qg_id = get_edge_ids_by_qg_id(knowledge_graph)
    # Grab the portion of the QG already 'expanded' (aka, present in the KG)
    sub_query_graph = QueryGraph(nodes={key:qnode for key, qnode in query_graph.nodes.items() if key in set(kg_nodes_by_qg_id)},
                                 edges={key:qedge for key, qedge in query_graph.edges.items() if key in set(kg_edges_by_qg_id)})

    # Compute results using Resultify so we can see which nodes appear in the same results
    resultifier = ARAXResultify()
    sub_response = ARAXResponse()
    sub_response.envelope = Response()
    sub_response.envelope.message = Message()
    sub_message = sub_response.envelope.message
    sub_message.query_graph = sub_query_graph
    sub_message.knowledge_graph = KnowledgeGraph(nodes=knowledge_graph.nodes.copy(),
                                                 edges=knowledge_graph.edges.copy())
    #sub_response.envelope.message = sub_message
    resultify_response = resultifier.apply(sub_response, {})

    # Figure out which node pairs appear together in one or more results
    if resultify_response.status == 'OK':
        node_pairs = set()
        for result in sub_message.results:
            subject_curies_in_this_result = {node_binding.id for key, node_binding_list in result.node_bindings.items() for node_binding in node_binding_list if
                                            key == subject_qnode_key}
            object_curies_in_this_result = {node_binding.id for key, node_binding_list in result.node_bindings.items() for node_binding in node_binding_list if
                                            key == object_qnode_key}
            pairs_in_this_result = set(itertools.product(subject_curies_in_this_result, object_curies_in_this_result))
            node_pairs = node_pairs.union(pairs_in_this_result)
        log.debug(f"Identified {len(node_pairs)} node pairs to overlay (with help of resultify)")
        if node_pairs:
            return node_pairs
    # Back up to using the old (O(n^2)) method of all combinations of subject/object nodes in the KG
    log.warning(f"Failed to narrow down node pairs to overlay; defaulting to all possible combinations")
    return set(itertools.product(kg_nodes_by_qg_id[subject_qnode_key], kg_nodes_by_qg_id[object_qnode_key]))
Exemplo n.º 2
0
    def apply_fetch_message(self, message, input_parameters, describe=False):
        """
        Adds a new QEdge object to the QueryGraph inside the Message object
        :return: ARAXResponse object with execution information
        :rtype: ARAXResponse
        """

        # #### Command definition for autogenerated documentation
        command_definition = {
            'dsl_command': 'fetch_message()',
            'description':
            """The `fetch_message` command fetches a remote Message by its id and can then allow further processing on it.""",
            'parameters': {
                'id': {
                    'is_required':
                    True,
                    'examples':
                    ['https://arax.ncats.io/api/arax/v1.0/message/1'],
                    'default':
                    '',
                    'type':
                    'string',
                    'description':
                    """A URL/URI that identifies the Message to be fetched""",
                },
            }
        }

        if describe:
            return command_definition

        #### Define a default response
        response = ARAXResponse()
        self.response = response
        self.message = message

        #### Basic checks on arguments
        if not isinstance(input_parameters, dict):
            response.error("Provided parameters is not a dict",
                           error_code="ParametersNotDict")
            return response

        #### Define a complete set of allowed parameters and their defaults
        parameters = {
            'uri': None,
        }

        #### Loop through the input_parameters and override the defaults and make sure they are allowed
        for key, value in input_parameters.items():
            if key not in parameters:
                response.error(f"Supplied parameter {key} is not permitted",
                               error_code="UnknownParameter")
            else:
                parameters[key] = value
        #### Return if any of the parameters generated an error (showing not just the first one)
        if response.status != 'OK':
            return response

        #### Store these final parameters for convenience
        response.data['parameters'] = parameters
        self.parameters = parameters

        #### Basic checks on arguments
        message_uri = input_parameters['uri']
        if not isinstance(message_uri, str):
            response.error("Provided parameter is not a string",
                           error_code="ParameterNotString")
            return response

        response.info(f"Fetching Message via GET to '{message_uri}'")
        response_content = requests.get(message_uri,
                                        headers={'accept': 'application/json'})
        status_code = response_content.status_code

        if status_code != 200:
            response.error(
                f"GET to '{message_uri}' returned HTTP code {status_code} and content '{response_content.content}'",
                error_code="GETFailed")
            response.error(
                f"GET to '{message_uri}' returned HTTP code {status_code}",
                error_code="GETFailed")
            return response

        #### Unpack the response content into a dict and dump
        try:
            response_dict = response_content.json()
            envelope = Response().from_dict(response_dict)
        except:
            response.error(
                f"Error converting response from '{message_uri}' to objects from content",
                error_code="UnableToParseContent")
            return response

        #### Store the decoded message and return response
        message = envelope.message
        self.message = message
        self.envelope = envelope
        n_results = 0
        n_qg_nodes = 0
        n_kg_nodes = 0
        if message.results is not None and isinstance(message.results, list):
            n_results = len(message.results)
        if message.query_graph is not None and isinstance(
                message.query_graph, QueryGraph) and isinstance(
                    message.query_graph.nodes, list):
            n_qg_nodes = len(message.query_graph.nodes)
        if message.knowledge_graph is not None and isinstance(
                message.knowledge_graph, KnowledgeGraph) and isinstance(
                    message.knowledge_graph.nodes, list):
            n_kg_nodes = len(message.knowledge_graph.nodes)
        response.info(
            f"Retreived Message with {n_qg_nodes} QueryGraph nodes, {n_kg_nodes} KnowledgeGraph nodes, and {n_results} results"
        )
        if n_qg_nodes > 0 and n_kg_nodes > 0 and n_results > 0:
            resultifier = ARAXResultify()
            response.envelope = envelope
            resultifier.recompute_qg_keys(response)
        return response
Exemplo n.º 3
0
def create_results(
    qg: QueryGraph,
    kg: QGOrganizedKnowledgeGraph,
    log: ARAXResponse,
    overlay_fet: bool = False,
    rank_results: bool = False,
    qnode_key_to_prune: Optional[str] = None,
) -> Response:
    regular_format_kg = convert_qg_organized_kg_to_standard_kg(kg)
    resultifier = ARAXResultify()
    prune_response = ARAXResponse()
    prune_response.envelope = Response()
    prune_response.envelope.message = Message()
    prune_message = prune_response.envelope.message
    prune_message.query_graph = qg
    prune_message.knowledge_graph = regular_format_kg
    if overlay_fet:
        log.debug(
            f"Using FET to assess quality of intermediate answers in Expand")
        connected_qedges = [
            qedge for qedge in qg.edges.values()
            if qedge.subject == qnode_key_to_prune
            or qedge.object == qnode_key_to_prune
        ]
        qnode_pairs_to_overlay = {
            (qedge.subject if qedge.subject != qnode_key_to_prune else
             qedge.object, qnode_key_to_prune)
            for qedge in connected_qedges
        }
        for qnode_pair in qnode_pairs_to_overlay:
            pair_string_id = f"{qnode_pair[0]}-->{qnode_pair[1]}"
            log.debug(f"Overlaying FET for {pair_string_id} (from Expand)")
            fet_qedge_key = f"FET{pair_string_id}"
            try:
                overlayer = ARAXOverlay()
                params = {
                    "action": "fisher_exact_test",
                    "subject_qnode_key": qnode_pair[0],
                    "object_qnode_key": qnode_pair[1],
                    "virtual_relation_label": fet_qedge_key
                }
                overlayer.apply(prune_response, params)
            except Exception as error:
                exception_type, exception_value, exception_traceback = sys.exc_info(
                )
                log.warning(
                    f"An uncaught error occurred when overlaying with FET during Expand's pruning: {error}: "
                    f"{repr(traceback.format_exception(exception_type, exception_value, exception_traceback))}"
                )
            if prune_response.status != "OK":
                log.warning(
                    f"FET produced an error when Expand tried to use it to prune the KG. "
                    f"Log was: {prune_response.show()}")
                log.debug(f"Will continue pruning without overlaying FET")
                # Get rid of any FET edges that might be in the KG/QG, since this step failed
                remove_edges_with_qedge_key(
                    prune_response.envelope.message.knowledge_graph,
                    fet_qedge_key)
                qg.edges.pop(fet_qedge_key, None)
                prune_response.status = "OK"  # Clear this so we can continue without overlaying
            else:
                if fet_qedge_key in qg.edges:
                    qg.edges[
                        fet_qedge_key].option_group_id = f"FET_VIRTUAL_GROUP_{pair_string_id}"
                else:
                    log.warning(
                        f"Attempted to overlay FET from Expand, but it didn't work. Pruning without it."
                    )

    # Create results and rank them as appropriate
    log.debug(f"Calling Resultify from Expand for pruning")
    resultifier.apply(prune_response, {})
    if rank_results:
        try:
            log.debug(f"Ranking Expand's intermediate pruning results")
            ranker = ARAXRanker()
            ranker.aggregate_scores_dmk(prune_response)
        except Exception as error:
            exception_type, exception_value, exception_traceback = sys.exc_info(
            )
            log.error(
                f"An uncaught error occurred when attempting to rank results during Expand's pruning: "
                f"{error}: {repr(traceback.format_exception(exception_type, exception_value, exception_traceback))}."
                f"Log was: {prune_response.show()}",
                error_code="UncaughtARAXiError")
            # Give any unranked results a score of 0
            for result in prune_response.envelope.message.results:
                if result.score is None:
                    result.score = 0
    return prune_response