Exemplo n.º 1
0
def convert_nsarg(
    nsarg: str,
    api_url: str = None,
    namespace_targets: Mapping[str, List[str]] = None,
    canonicalize: bool = False,
    decanonicalize: bool = False,
) -> str:
    """[De]Canonicalize NSArg

    Args:
        nsarg (str): bel statement string or partial string (e.g. subject or object)
        api_url (str): BEL.bio api url to use, e.g. https://api.bel.bio/v1
        namespace_targets (Mapping[str, List[str]]): formatted as in configuration file example
        canonicalize (bool): use canonicalize endpoint/namespace targets
        decanonicalize (bool): use decanonicalize endpoint/namespace targets

    Results:
        str: converted NSArg
    """

    if not api_url:
        api_url = config["bel_api"]["servers"]["api_url"]
        if not api_url:
            log.error("Missing api url - cannot convert namespace")
            return None

    params = None
    if namespace_targets:
        namespace_targets_str = json.dumps(namespace_targets)
        params = {"namespace_targets": namespace_targets_str}

    if not namespace_targets:
        if canonicalize:
            api_url = api_url + "/terms/{}/canonicalized"
        elif decanonicalize:
            api_url = api_url + "/terms/{}/decanonicalized"
        else:
            log.warning(
                "Missing (de)canonical flag - cannot convert namespaces")
            return nsarg
    else:

        api_url = api_url + "/terms/{}/canonicalized"  # overriding with namespace_targets

    request_url = api_url.format(url_path_param_quoting(nsarg))

    r = http_client.get(request_url, params=params, timeout=10)

    if r and r.status_code == 200:
        nsarg = r.json().get("term_id", nsarg)
    elif not r or r.status_code == 404:
        log.error(f"[de]Canonicalization endpoint missing: {request_url}")

    return nsarg
Exemplo n.º 2
0
def validate_arg_values(ast, bo):
    """Recursively validate arg (NSArg and StrArg) values

    Check that NSArgs are found in BELbio API and match appropriate entity_type.
    Check that StrArgs match their value - either default namespace or regex string

    Generate a WARNING if not.

    Args:
        bo: bel object

    Returns:
        bel object
    """

    if not bo.api_url:
        log.info("No API endpoint defined")
        return bo

    log.debug(f"AST: {ast}")

    # Test NSArg terms
    if isinstance(ast, NSArg):
        term_id = "{}:{}".format(ast.namespace, ast.value)
        value_types = ast.value_types
        log.debug(f"Value types: {value_types}  AST value: {ast.value}")
        # Default namespaces are defined in the bel_specification file
        if ast.namespace == "DEFAULT":  # may use the DEFAULT namespace or not
            for value_type in value_types:
                default_namespace = [
                    ns["name"]
                    for ns in bo.spec["namespaces"][value_type]["info"]
                ] + [
                    ns["abbreviation"]
                    for ns in bo.spec["namespaces"][value_type]["info"]
                ]

                if ast.value in default_namespace:
                    log.debug(
                        "Default namespace valid term: {}".format(term_id))
                    break
            else:  # if for loop doesn't hit the break, run this else
                log.debug("Default namespace invalid term: {}".format(term_id))
                bo.validation_messages.append(
                    ("WARNING", f"Default Term: {term_id} not found"))

        # Process normal, non-default-namespace terms
        else:
            request_url = bo.api_url + "/terms/{}".format(
                url_path_param_quoting(term_id))
            log.info(f"Validate Arg Values url {request_url}")
            r = get_url(request_url)
            if r and r.status_code == 200:
                result = r.json()
                # function signature term value_types doesn't match up with API term entity_types

                log.debug(
                    f'AST.value_types  {ast.value_types}  Entity types {result.get("entity_types", [])}'
                )

                # Check that entity types match
                if len(
                        set(ast.value_types).intersection(
                            result.get("entity_types", []))) == 0:
                    log.debug(
                        "Invalid Term - statement term {} allowable entity types: {} do not match API term entity types: {}"
                        .format(term_id, ast.value_types,
                                result.get("entity_types", [])))
                    bo.validation_messages.append((
                        "WARNING",
                        "Invalid Term - statement term {} allowable entity types: {} do not match API term entity types: {}"
                        .format(term_id, ast.value_types,
                                result.get("entity_types", [])),
                    ))

                if term_id in result.get("obsolete_ids", []):
                    bo.validation_messages.append((
                        "WARNING",
                        f'Obsolete term: {term_id}  Current term: {result["id"]}'
                    ))

            elif r.status_code == 404:
                bo.validation_messages.append(
                    ("WARNING", f"Term: {term_id} not found in namespace"))
            else:
                log.error(f"Status {r.status_code} - Bad URL: {request_url}")

    # Process StrArgs
    if isinstance(ast, StrArg):
        log.debug(f"  Check String Arg: {ast.value}  {ast.value_types}")
        for value_type in ast.value_types:
            # Is this a regex to match against
            if re.match("/", value_type):
                value_type = re.sub("^/", "", value_type)
                value_type = re.sub("/$", "", value_type)
                match = re.match(value_type, ast.value)
                if match:
                    break
            if value_type in bo.spec["namespaces"]:
                default_namespace = [
                    ns["name"]
                    for ns in bo.spec["namespaces"][value_type]["info"]
                ] + [
                    ns["abbreviation"]
                    for ns in bo.spec["namespaces"][value_type]["info"]
                ]
                if ast.value in default_namespace:
                    break
        else:  # If for loop doesn't hit the break, no matches found, therefore for StrArg value is bad
            bo.validation_messages.append((
                "WARNING",
                f"String value {ast.value} does not match default namespace value or regex pattern: {ast.value_types}",
            ))

    # Recursively process every NSArg by processing BELAst and Functions
    if hasattr(ast, "args"):
        for arg in ast.args:
            validate_arg_values(arg, bo)

    return bo