Exemplo n.º 1
0
def test_validator_explicit_biolink_version():
    """
    A fake test to establish a success condition for validation.
    """
    G = NxGraph()
    G.add_node(
        "CHEMBL.COMPOUND:1222250",
        id="CHEMBL.COMPOUND:1222250",
        name="Dextrose",
        category=["Carbohydrate"],
    )
    G.add_node(
        "UBERON:0000001", id="UBERON:0000001", name="fake", category=["NamedThing"]
    )
    G.add_edge(
        "CHEMBL.COMPOUND:1222250",
        "UBERON:0000001",
        id="CHEMBL.COMPOUND:1222250-part_of-UBERON:0000001",
        relation="RO:1",
        predicate="part_of",
        subject="CHEMBL.COMPOUND:1222250",
        object="UBERON:0000001",
        category=["biolink:Association"],
    )
    Validator.set_biolink_model(version="1.8.2")
    validator = Validator(verbose=True)
    validator.validate(G)
    print(validator.get_errors())
    assert len(validator.get_errors()) == 0
Exemplo n.º 2
0
def test_distinct_class_versus_validator_instance_biolink_version():
    Validator.set_biolink_model(version="1.7.0")
    validator = Validator()
    Validator.set_biolink_model(version="1.8.2")
    validator_class_tk = Validator.get_toolkit()
    validation_instance_version = validator.get_validation_model_version()
    assert validation_instance_version != validator_class_tk.get_model_version()
Exemplo n.º 3
0
def test_validate_by_stream_inspector():
    """
    Test generate the validate function by streaming
    graph data through a graph Transformer.process() Inspector
    """
    input_args = {
        "filename": [
            os.path.join(RESOURCE_DIR, "graph_nodes.tsv"),
            os.path.join(RESOURCE_DIR, "graph_edges.tsv"),
        ],
        "format": "tsv",
        "aggregator_knowledge_source": True,
    }

    Validator.set_biolink_model("1.8.2")

    # Validator assumes the currently set Biolink Release
    validator = Validator()

    transformer = Transformer(stream=True)

    transformer.transform(
        input_args=input_args,
        output_args={
            "format": "null"
        },  # streaming processing throws the graph data away
        # ... Second, we inject the Inspector into the transform() call,
        # for the underlying Transformer.process() to use...
        inspector=validator,
    )

    validator.write_report()

    e = validator.get_errors()
    assert len(e) == 0
Exemplo n.º 4
0
def validate(
    inputs: List[str],
    input_format: str,
    input_compression: Optional[str],
    output: Optional[str],
    stream: bool,
    biolink_release: Optional[str] = None,
) -> Dict:
    """
    Run KGX validator on an input file to check for Biolink Model compliance.

    Parameters
    ----------
    inputs: List[str]
        Input files
    input_format: str
        The input format
    input_compression: Optional[str]
        The input compression type
    output: Optional[str]
        Path to output file (stdout, by default)
    stream: bool
         Whether to parse input as a stream.
    biolink_release: Optional[str] = None
        SemVer version of Biolink Model Release used for validation (default: latest Biolink Model Toolkit version)

    Returns
    -------
    Dict
        A dictionary of entities which have parse errors indexed by [message_level][error_type][message]

    """
    # New design pattern enabling 'stream' processing of statistics on a small memory footprint
    # by injecting an inspector in the Transformer.process() source-to-sink data flow.
    #
    # First, we instantiate a Validator() class (converted into a Callable class) as an Inspector ...
    # In the new "Inspector" design pattern, we need to instantiate it before the Transformer.
    #
    Validator.set_biolink_model(biolink_release)

    # Validator assumes the currently set Biolink Release
    validator = Validator()

    if stream:
        transformer = Transformer(stream=stream)

        transformer.transform(
            input_args={
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            },
            output_args={"format": "null"
                         },  # streaming processing throws the graph data away
            # ... Second, we inject the Inspector into the transform() call,
            # for the underlying Transformer.process() to use...
            inspector=validator,
        )
    else:
        # "Classical" non-streaming mode, with click.progressbar
        # but an unfriendly large memory footprint for large graphs

        transformer = Transformer()

        transformer.transform(
            {
                "filename": inputs,
                "format": input_format,
                "compression": input_compression,
            }, )

        # Slight tweak of classical 'validate' function: that the
        # list of errors are cached internally in the Validator object
        validator.validate(transformer.store.graph)

    if output:
        validator.write_report(open(output, "w"))
    else:
        validator.write_report(stdout)

    # ... Third, we return directly any validation errors to the caller
    return validator.get_errors()
Exemplo n.º 5
0
def test_distinct_validator_class_versus_default_toolkit_biolink_version():
    Validator.set_biolink_model(version="1.8.2")
    default_tk = get_toolkit()
    validator_tk = Validator.get_toolkit()
    assert default_tk.get_model_version() != validator_tk.get_model_version()