示例#1
0
def load_schemas() -> dict:
    """
    Load all JSON schemas into a dictionary keyed on the
    schema directory. Values are dictionaries mapping entity
    names to loaded and validated entity schemas.
    """
    schemas = {}
    for root, _, paths in os.walk(SCHEMA_DIR):
        root_schemas = {}
        for path in paths:
            if not path.endswith(".json"):
                continue

            schema_path = os.path.join(root, path)

            full_json = _load_dont_validate_schema(schema_path, SCHEMA_DIR)

            resolver = jsonschema.RefResolver(f"file://{SCHEMA_DIR}/schemas",
                                              full_json)

            def json_to_html(ref: str) -> dict:
                """Update refs to refer to the URL of the corresponding documentation."""
                url = ref.replace(".json", ".html")
                url = url.replace("properties/", "")
                url = url.replace("definitions/", "")
                url = url.replace("/", ".")
                # resolve ref, extract resolved['description']
                with resolver.resolving(ref) as resolved:
                    if "description" in resolved:
                        description = resolved["description"]
                    else:
                        description = ""

                return {"url": url, "description": description}

            schema = _load_dont_validate_schema(schema_path,
                                                SCHEMA_DIR,
                                                on_refs=json_to_html)

            assert path.endswith(".json")
            schema_name = path[:-5].replace("/", ".")
            root_schemas[schema_name] = (schema, full_json)

        relative_root = root.replace(SCHEMA_DIR, "").replace("/", ".")
        relative_root = relative_root.replace(".", "", 1)
        schemas[relative_root] = root_schemas

    return schemas
def test_special_keywords():

    # load the schema
    schema_root = SCHEMA_DIR
    schema_path = os.path.join(SCHEMA_DIR,
                               "templates/metadata/cytof_template.json")
    # we don't validate it because it's a template, not a schema
    schema = _load_dont_validate_schema(schema_path, schema_root)

    tmp1 = schema["properties"]["worksheets"]["Acquisition and Preprocessing"]
    tmp2 = tmp1["data_columns"]["Preprocessing"]["processed fcs filename"]

    assert "is_multi_artifact" not in tmp2
    assert "is_artifact" in tmp2
示例#3
0
def test_convert_api_to_template_rna():
    rna_api = {
        "cimac id": [
            {  # use first entry as example
                "filter_group": "alignment",
                "file_path_template": "analysis/star/{id}/{id}.sorted.bam",
                "short_description": "star alignment output",
                "long_description": "file sorted_bam file sorted_bam file sorted_bam file",
                "file_purpose": "Analysis view",
            }
        ]
    }

    rna_json = {
        "title": "RNAseq level 1 analysis template",
        "description":
        "Metadata information for RNAseq level 1 Analysis output.",
        "prism_template_root_object_schema":
        "assays/components/ngs/rna/rna_analysis.json",
        "prism_template_root_object_pointer": "/analysis/rna_analysis",
        "properties": {
            "worksheets": {
                "RNAseq Analysis": {
                    "preamble_rows": {
                        "protocol identifier": {
                            "merge_pointer":
                            "2/protocol_identifier",
                            "type_ref":
                            "clinical_trial.json#properties/protocol_identifier",
                        },
                        "folder": {
                            "do_not_merge": True,
                            "type": "string",
                            "allow_empty": True,
                        },
                    },
                    "prism_data_object_pointer": "/level_1/-",
                    "data_columns": {
                        "RNAseq Runs": {
                            "cimac id": {
                                "merge_pointer":
                                "/cimac_id",
                                "type_ref":
                                "sample.json#properties/cimac_id",
                                "process_as": [{
                                    "parse_through":
                                    "lambda id: f'{folder or \"\"}analysis/star/{id}/{id}.sorted.bam'",
                                    "merge_pointer": "0/star/sorted_bam",
                                    "gcs_uri_format":
                                    "{protocol identifier}/rna/{cimac id}/analysis/star/sorted.bam",
                                    "type_ref":
                                    "assays/components/local_file.json#properties/file_path",
                                    "is_artifact": 1,
                                }],
                            },
                            "comments": {
                                "type_ref":
                                "assays/components/ngs/rna/rna_level1_analysis.json#properties/comments",
                                "merge_pointer": "0/comments",
                                "allow_empty": True,
                            },
                        }
                    },
                },
                **_excluded_samples_worksheet_snippet,
            }
        },
    }

    assay_schema = _load_dont_validate_schema(
        "assays/components/ngs/rna/rna_analysis.json")
    rna_output = _convert_api_to_template("rna", rna_api, assay_schema)
    assert DeepDiff(rna_json, rna_output) == {}

    rna_api_bad_key = {"foo": [{}]}
    with pytest.raises(InvalidMergeTargetException,
                       match="corresponding entry"):
        _convert_api_to_template("rna", rna_api_bad_key, assay_schema)

    rna_api_no_target = {
        "cimac id": [{
            "filter_group": "alignment",
            "file_path_template": "foo",  # used to generate merge_pointer
            "short_description": "star alignment output",
            "long_description":
            "file sorted_bam file sorted_bam file sorted_bam file",
            "file_purpose": "Analysis view",
        }]
    }
    with pytest.raises(InvalidMergeTargetException, match="cannot be mapped"):
        _convert_api_to_template("rna", rna_api_no_target, assay_schema)

    rna_api_merge_collision = {
        "cimac id": [
            {
                "filter_group": "alignment",
                "file_path_template": "analysis/star/{id}/{id}.sorted.bam",
                "short_description": "star alignment output",
                "long_description": "file sorted_bam file sorted_bam file sorted_bam file",
                "file_purpose": "Analysis view",
            },
            {  # direct repeat will collide
                "filter_group": "alignment",
                "file_path_template": "analysis/star/{id}/{id}.sorted.bam",
                "short_description": "star alignment output",
                "long_description": "file sorted_bam file sorted_bam file sorted_bam file",
                "file_purpose": "Analysis view",
            },
        ]
    }
    with pytest.raises(InvalidMergeTargetException,
                       match="collision for inferred merge target"):
        _convert_api_to_template("rna", rna_api_merge_collision, assay_schema)
示例#4
0
def test_convert_api_to_template_wes():
    wes_api = {
        "run id": [{
            "filter_group": "clonality/clonality_pyclone",
            "file_path_template":
            "analysis/clonality/{run id}/{run id}_pyclone.tsv",
            "short_description": "clonality_pyclone file",
            "long_description":
            "clonality_pyclone file clonality_pyclone file",
            "file_purpose": "Miscellaneous",
        }],
        "tumor cimac id": [{
            "filter_group": "alignment/align_sorted_dedup",
            "file_path_template":
            "analysis/align/{tumor cimac id}/{tumor cimac id}.sorted.dedup.bam",
            "short_description": "align_sorted_dedup file",
            "long_description":
            "align_sorted_dedup file align_sorted_dedup file",
            "file_purpose": "Miscellaneous",
        }],
    }

    wes_json = {
        "title": "WES analysis template",
        "description": "Metadata information for WES Analysis output.",
        "prism_template_root_object_schema": "assays/wes_analysis.json",
        "prism_template_root_object_pointer": "/analysis/wes_analysis",
        "properties": {
            "worksheets": {
                "WES Analysis": {
                    "preamble_rows": {
                        "protocol identifier": {
                            "merge_pointer":
                            "2/protocol_identifier",
                            "type_ref":
                            "clinical_trial.json#properties/protocol_identifier",
                        },
                        "folder": {
                            "do_not_merge": True,
                            "type": "string",
                            "allow_empty": True,
                        },
                    },
                    "prism_data_object_pointer": "/pair_runs/-",
                    "data_columns": {
                        "WES Runs": {
                            "run id": {
                                "merge_pointer":
                                "/run_id",
                                "type_ref":
                                "assays/wes_analysis.json#definitions/pair_analysis/properties/run_id",
                                "process_as": [{
                                    "parse_through":
                                    "lambda run: f'{folder or \"\"}analysis/clonality/{run}/{run}_pyclone.tsv'",
                                    "merge_pointer":
                                    "/clonality/clonality_pyclone",
                                    "gcs_uri_format":
                                    "{protocol identifier}/wes/{run id}/analysis/clonality_pyclone.tsv",
                                    "type_ref":
                                    "assays/components/local_file.json#properties/file_path",
                                    "is_artifact": 1,
                                }],
                            },
                            "tumor cimac id": {
                                "merge_pointer":
                                "/tumor/cimac_id",
                                "type_ref":
                                "sample.json#properties/cimac_id",
                                "process_as": [{
                                    "parse_through":
                                    "lambda id: f'{folder or \"\"}analysis/align/{id}/{id}.sorted.dedup.bam'",
                                    "merge_pointer":
                                    "/tumor/alignment/align_sorted_dedup",
                                    "gcs_uri_format":
                                    "{protocol identifier}/wes/{run id}/analysis/tumor/{tumor cimac id}/sorted.dedup.bam",
                                    "type_ref":
                                    "assays/components/local_file.json#properties/file_path",
                                    "is_artifact": 1,
                                }],
                            },
                            "comments": {
                                "type_ref":
                                "assays/wes_analysis.json#definitions/pair_analysis/properties/comments",
                                "merge_pointer": "0/comments",
                                "allow_empty": True,
                            },
                        }
                    },
                },
                **_excluded_samples_worksheet_snippet,
            }
        },
    }

    assay_schema = _load_dont_validate_schema("assays/wes_analysis.json")

    wes_output = _convert_api_to_template("wes", wes_api, assay_schema)
    assert DeepDiff(wes_json, wes_output) == {}