Пример #1
0
class Postprocessing(Processing):
    name = fields.String(
        validate=field_validators.OneOf(get_args(raw_nodes.PostprocessingName)),
        required=True,
        bioimageio_description=f"Name of postprocessing. One of: {', '.join(get_args(raw_nodes.PostprocessingName))}.",
    )
    kwargs = fields.Kwargs(
        bioimageio_description=f"Key word arguments as described in [postprocessing spec]"
        f"(https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/postprocessing_spec_"
        f"{'_'.join(get_args(raw_nodes.FormatVersion)[-1].split('.')[:2])}.md)."
    )

    class scale_range(Preprocessing.scale_range):
        reference_tensor = fields.String(
            required=False,
            validate=field_validators.Predicate("isidentifier"),
            bioimageio_description="Tensor name to compute the percentiles from. Default: The tensor itself. "
            "If mode==per_dataset this needs to be the name of an input tensor.",
        )

    class scale_mean_variance(SharedProcessingSchema):
        bioimageio_description = "Scale the tensor s.t. its mean and variance match a reference tensor."
        mode = fields.ProcMode(required=True, valid_modes=("per_dataset", "per_sample"))
        reference_tensor = fields.String(
            required=True,
            validate=field_validators.Predicate("isidentifier"),
            bioimageio_description="Name of tensor to match.",
        )
Пример #2
0
class Postprocessing(Postprocessing03):
    kwargs = fields.Kwargs(
        bioimageio_description=
        f"Key word arguments as described in [postprocessing spec]"
        f"(https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/postprocessing_spec_"
        f"{'_'.join(get_args(raw_nodes.FormatVersion)[-1].split('.')[:2])}.md)."
    )
Пример #3
0
class RunMode(_BioImageIOSchema):
    name = fields.String(required=True,
                         bioimageio_description="The name of the `run_mode`")
    kwargs = fields.Kwargs()

    @validates("name")
    def warn_on_unrecognized_run_mode(self, value: str):
        if isinstance(value, str):
            self.warn("name", f"Unrecognized run mode '{value}'")
Пример #4
0
class Preprocessing(Processing):
    name = fields.String(
        required=True,
        validate=field_validators.OneOf(get_args(raw_nodes.PreprocessingName)),
        bioimageio_description=f"Name of preprocessing. One of: {', '.join(get_args(raw_nodes.PreprocessingName))}.",
    )
    kwargs = fields.Kwargs(
        bioimageio_description=f"Key word arguments as described in [preprocessing spec]"
        f"(https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/preprocessing_spec_"
        f"{'_'.join(get_args(raw_nodes.FormatVersion)[-1].split('.')[:2])}.md)."
    )

    class scale_range(SharedProcessingSchema):
        bioimageio_description = "Scale with percentiles."
        mode = fields.ProcMode(required=True, valid_modes=("per_dataset", "per_sample"))
        axes = fields.Axes(
            required=True,
            valid_axes="czyx",
            bioimageio_description="The subset of axes to normalize jointly. For example xy to normalize the two image "
            "axes for 2d data jointly. The batch axis (b) is not valid here.",
        )
        min_percentile = fields.Float(
            default=0,
            validate=field_validators.Range(0, 100, min_inclusive=True, max_inclusive=False),
            bioimageio_description="The lower percentile used for normalization, in range 0 to 100. Default value: 0.",
        )
        max_percentile = fields.Float(
            default=100,
            validate=field_validators.Range(1, 100, min_inclusive=False, max_inclusive=True),
            bioimageio_description="The upper percentile used for normalization, in range 1 to 100. Has to be bigger "
            "than min_percentile. Default value: 100. The range is 1 to 100 instead of 0 to 100 to avoid mistakenly "
            "accepting percentiles specified in the range 0.0 to 1.0.",
        )
        eps = fields.Float(
            missing=1e-6,
            bioimageio_description="Epsilon for numeric stability: "
            "`out = (tensor - v_lower) / (v_upper - v_lower + eps)`; "
            "with `v_lower,v_upper` values at the respective percentiles. Default value: 10^-6.",
        )

        @validates_schema
        def min_smaller_max(self, data, **kwargs):
            min_p = data.get("min_percentile", 0)
            max_p = data.get("max_percentile", 100)
            if min_p >= max_p:
                raise ValidationError(f"min_percentile {min_p} >= max_percentile {max_p}")
Пример #5
0
class PytorchStateDictWeightsEntry(_WeightsEntryBase):
    bioimageio_description = "PyTorch state dictionary weights format"
    weights_format = fields.String(
        validate=field_validators.Equal("pytorch_state_dict"),
        required=True,
        load_only=True)
    architecture = fields.ImportableSource(
        required=True,
        bioimageio_description=
        "Source code of the model architecture that either points to a "
        "local implementation: `<relative path to file>:<identifier of implementation within the file>` or the "
        "implementation in an available dependency: `<root-dependency>.<sub-dependency>.<identifier>`.\nFor example: "
        "`my_function.py:MyImplementation` or `bioimageio.core.some_module.some_class_or_function`.",
    )
    architecture_sha256 = fields.String(
        bioimageio_maybe_required=True,
        validate=field_validators.Length(equal=64),
        bioimageio_description=
        "This field is only required if the architecture points to a source file. "
        "SHA256 checksum of the model source code file." +
        _common_sha256_hint.replace(
            "    ", "        "),  # sha256 hint with one more intend level
    )
    kwargs = fields.Kwargs(
        bioimageio_description=
        "Keyword arguments for the implementation specified by `architecture`."
    )
    pytorch_version = fields.Version()

    @validates_schema
    def sha_for_source_code_file(self, data, **kwargs):
        arch = data.get("architecture")
        if isinstance(arch, raw_nodes.ImportableModule):
            return
        elif isinstance(arch, raw_nodes.ImportableSourceFile):
            sha = data.get("architecture_sha256")
            if sha is None:
                raise ValidationError(
                    "When specifying 'architecture' with a callable from a source file, "
                    "the corresponding 'architecture_sha256' field is required."
                )
Пример #6
0
class Model(rdf.schema.RDF):
    raw_nodes = raw_nodes

    class Meta:
        unknown = RAISE

    bioimageio_description = f"""# BioImage.IO Model Resource Description File Specification {get_args(raw_nodes.FormatVersion)[-1]}
This specification defines the fields used in a BioImage.IO-compliant resource description file (`RDF`) for describing AI models with pretrained weights.
These fields are typically stored in YAML files which we called Model Resource Description Files or `model RDF`.
The model RDFs can be downloaded or uploaded to the bioimage.io website, produced or consumed by BioImage.IO-compatible consumers(e.g. image analysis software or other website).

The model RDF YAML file contains mandatory and optional fields. In the following description, optional fields are indicated by _optional_.
_optional*_ with an asterisk indicates the field is optional depending on the value in another field.
"""
    # todo: unify authors with RDF (optional or required?)
    authors = fields.List(
        fields.Nested(Author()), required=True, bioimageio_description=rdf.schema.RDF.authors_bioimageio_description
    )

    badges = missing_
    cite = fields.List(
        fields.Nested(CiteEntry()),
        required=True,  # todo: unify authors with RDF (optional or required?)
        bioimageio_description=rdf.schema.RDF.cite_bioimageio_description,
    )

    documentation = fields.Union(
        [
            fields.URL(),
            fields.RelativeLocalPath(
                validate=field_validators.Attribute(
                    "suffix",
                    field_validators.Equal(
                        ".md", error="{!r} is invalid; expected markdown file with '.md' extension."
                    ),
                )
            ),
        ],
        required=True,
        bioimageio_description="Relative path to file with additional documentation in markdown. This means: 1) only "
        "relative file path is allowed 2) the file must be in markdown format with `.md` file name extension 3) URL is "
        "not allowed. It is recommended to use `README.md` as the documentation name.",
    )

    download_url = missing_

    dependencies = fields.Dependencies(  # todo: add validation (0.4.0?)
        bioimageio_description="Dependency manager and dependency file, specified as `<dependency manager>:<relative "
        "path to file>`. For example: 'conda:./environment.yaml', 'maven:./pom.xml', or 'pip:./requirements.txt'"
    )

    format_version = fields.String(
        validate=field_validators.OneOf(get_args_flat(raw_nodes.FormatVersion)),
        required=True,
        bioimageio_description_order=0,
        bioimageio_description=f"""Version of the BioImage.IO Model Resource Description File Specification used.
This is mandatory, and important for the consumer software to verify before parsing the fields.
The recommended behavior for the implementation is to keep backward compatibility and throw an error if the model yaml
is in an unsupported format version. The current format version described here is
{get_args(raw_nodes.FormatVersion)[-1]}""",
    )

    framework = fields.String(
        validate=field_validators.OneOf(get_args(raw_nodes.Framework)),
        bioimageio_description=f"The deep learning framework of the source code. One of: "
        f"{', '.join(get_args(raw_nodes.Framework))}. This field is only required if the field `source` is present.",
    )

    git_repo = fields.String(
        validate=field_validators.URL(schemes=["http", "https"]),
        bioimageio_description=rdf.schema.RDF.git_repo_bioimageio_description
        + "If the model is contained in a subfolder of a git repository, then a url to the exact folder"
        + "(which contains the configuration yaml file) should be used.",
    )

    icon = missing_

    kwargs = fields.Kwargs(
        bioimageio_description="Keyword arguments for the implementation specified by `source`. "
        "This field is only required if the field `source` is present."
    )

    language = fields.String(
        validate=field_validators.OneOf(get_args(raw_nodes.Language)),
        bioimageio_maybe_required=True,
        bioimageio_description=f"Programming language of the source code. One of: "
        f"{', '.join(get_args(raw_nodes.Language))}. This field is only required if the field `source` is present.",
    )

    license = fields.String(
        required=True,  # todo: unify license with RDF (optional or required?)
        bioimageio_description=rdf.schema.RDF.license_bioimageio_description,
    )

    name = fields.String(
        # validate=field_validators.Length(max=36),  # todo: enforce in future version (0.4.0?)
        required=True,
        bioimageio_description="Name of this model. It should be human-readable and only contain letters, numbers, "
        "underscore '_',  minus '-' or spaces and not be longer than 36 characters.",
    )

    packaged_by = fields.List(
        fields.Nested(Author()),
        bioimageio_description=f"The persons that have packaged and uploaded this model. Only needs to be specified if "
        f"different from `authors` in root or any entry in `weights`.",
    )

    parent = fields.Nested(
        ModelParent(),
        bioimageio_description="Parent model from which the trained weights of this model have been derived, e.g. by "
        "finetuning the weights of this model on a different dataset. For format changes of the same trained model "
        "checkpoint, see `weights`.",
    )

    run_mode = fields.Nested(
        RunMode(),
        bioimageio_description="Custom run mode for this model: for more complex prediction procedures like test time "
        "data augmentation that currently cannot be expressed in the specification. "
        "No standard run modes are defined yet.",
    )

    sha256 = fields.String(
        validate=field_validators.Length(equal=64),
        bioimageio_description="SHA256 checksum of the model source code file."
        + _common_sha256_hint
        + " This field is only required if the field source is present.",
    )

    source = fields.ImportableSource(
        bioimageio_maybe_required=True,
        bioimageio_description="Language and framework specific implementation. As some weights contain the model "
        "architecture, the source is optional depending on the present weight formats. `source` can either point to a "
        "local implementation: `<relative path to file>:<identifier of implementation within the source file>` or the "
        "implementation in an available dependency: `<root-dependency>.<sub-dependency>.<identifier>`.\nFor example: "
        "`my_function.py:MyImplementation` or `core_library.some_module.some_function`.",
    )

    timestamp = fields.DateTime(
        required=True,
        bioimageio_description="Timestamp of the initial creation of this model in [ISO 8601]"
        "(#https://en.wikipedia.org/wiki/ISO_8601) format.",
    )

    weights = fields.Dict(
        fields.String(
            validate=field_validators.OneOf(get_args(raw_nodes.WeightsFormat)),
            required=True,
            bioimageio_description=f"Format of this set of weights. Weight formats can define additional (optional or "
            f"required) fields. See [weight_formats_spec_0_3.md]"
            f"(https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/weight_formats_spec_0_3.md). "
            f"One of: {', '.join(get_args(raw_nodes.WeightsFormat))}",
        ),
        fields.Union([fields.Nested(we()) for we in get_args(WeightsEntry)]),
        required=True,
        bioimageio_description="The weights for this model. Weights can be given for different formats, but should "
        "otherwise be equivalent. The available weight formats determine which consumers can use this model.",
    )

    @pre_load
    def add_weights_format_key_to_weights_entry_value(self, data: dict, many=False, partial=False, **kwargs):
        data = deepcopy(data)  # Schema.validate() calls pre_load methods, thus we should not modify the input data
        if many or partial:
            raise NotImplementedError

        for weights_format, weights_entry in data.get("weights", {}).items():
            if "weights_format" in weights_entry:
                raise ValidationError(f"Got unexpected key 'weights_format' in weights entry {weights_format}")

            weights_entry["weights_format"] = weights_format

        return data

    inputs = fields.List(
        fields.Nested(InputTensor()), bioimageio_description="Describes the input tensors expected by this model."
    )
    outputs = fields.List(
        fields.Nested(OutputTensor()), bioimageio_description="Describes the output tensors from this model."
    )

    test_inputs = fields.List(
        fields.Union([fields.URI(), fields.RelativeLocalPath()]),
        required=True,
        bioimageio_description="List of URIs or local relative paths to test inputs as described in inputs for "
        "**a single test case**. "
        "This means if your model has more than one input, you should provide one URI for each input. "
        "Each test input should be a file with a ndarray in "
        "[numpy.lib file format](https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html#module-numpy.lib.format)."
        "The extension must be '.npy'.",
    )
    test_outputs = fields.List(
        fields.Union([fields.URI(), fields.RelativeLocalPath()]),
        required=True,
        bioimageio_description="Analog to to test_inputs.",
    )

    sample_inputs = fields.List(
        fields.Union([fields.URI(), fields.RelativeLocalPath()]),
        bioimageio_description="List of URIs/local relative paths to sample inputs to illustrate possible inputs for "
        "the model, for example stored as png or tif images. "
        "The model is not tested with these sample files that serve to inform a human user about an example use case.",
    )
    sample_outputs = fields.List(
        fields.Union([fields.URI(), fields.RelativeLocalPath()]),
        bioimageio_description="List of URIs/local relative paths to sample outputs corresponding to the "
        "`sample_inputs`.",
    )

    config = fields.YamlDict(
        bioimageio_description=rdf.schema.RDF.config_bioimageio_description
        + """

    For example:
    ```yaml
    config:
      # custom config for DeepImageJ, see https://github.com/bioimage-io/configuration/issues/23
      deepimagej:
        model_keys:
          # In principle the tag "SERVING" is used in almost every tf model
          model_tag: tf.saved_model.tag_constants.SERVING
          # Signature definition to call the model. Again "SERVING" is the most general
          signature_definition: tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
        test_information:
          input_size: [2048x2048] # Size of the input images
          output_size: [1264x1264 ]# Size of all the outputs
          device: cpu # Device used. In principle either cpu or GPU
          memory_peak: 257.7 Mb # Maximum memory consumed by the model in the device
          runtime: 78.8s # Time it took to run the model
          pixel_size: [9.658E-4µmx9.658E-4µm] # Size of the pixels of the input
    ```
"""
    )

    @validates_schema
    def language_and_framework_match(self, data, **kwargs):
        field_names = ("language", "framework")
        valid_combinations = [
            ("python", "scikit-learn"),  # todo: remove
            ("python", "pytorch"),
            ("python", "tensorflow"),
            ("java", "tensorflow"),
        ]
        if "source" not in data:
            valid_combinations.append((missing_, missing_))
            valid_combinations.append(("python", missing_))
            valid_combinations.append(("java", missing_))

        combination = tuple(data.get(name, missing_) for name in field_names)
        if combination not in valid_combinations:
            raise ValidationError(f"invalid combination of {dict(zip(field_names, combination))}")

    @validates_schema
    def source_specified_if_required(self, data, **kwargs):
        if "source" in data:
            return

        weights_format_requires_source = {
            "pytorch_state_dict": True,
            "pytorch_script": False,
            "keras_hdf5": False,
            "tensorflow_js": False,
            "tensorflow_saved_model_bundle": False,
            "onnx": False,
        }
        require_source = {wf for wf in data["weights"] if weights_format_requires_source[wf]}
        if require_source:
            raise ValidationError(
                f"These specified weight formats require source code to be specified: {require_source}"
            )

    @validates_schema
    def validate_reference_tensor_names(self, data, **kwargs):
        valid_input_tensor_references = [ipt.name for ipt in data["inputs"]]
        for out in data["outputs"]:
            if out.postprocessing is missing_:
                continue

            for postpr in out.postprocessing:
                if postpr.kwargs is missing_:
                    continue

                ref_tensor = postpr.kwargs.get("reference_tensor", missing_)
                if ref_tensor is not missing_ and ref_tensor not in valid_input_tensor_references:
                    raise ValidationError(f"{ref_tensor} not found in inputs")

    @validates_schema
    def weights_entries_match_weights_formats(self, data, **kwargs):
        weights: typing.Dict[str, _WeightsEntryBase] = data["weights"]
        for weights_format, weights_entry in weights.items():
            if weights_format in ["keras_hdf5", "tensorflow_js", "tensorflow_saved_model_bundle"]:
                assert isinstance(
                    weights_entry,
                    (
                        raw_nodes.KerasHdf5WeightsEntry,
                        raw_nodes.TensorflowJsWeightsEntry,
                        raw_nodes.TensorflowSavedModelBundleWeightsEntry,
                    ),
                )
                if weights_entry.tensorflow_version is missing_:
                    # todo: raise ValidationError (allow -> require)?
                    warnings.warn(f"missing 'tensorflow_version' entry for weights format {weights_format}")

            if weights_format == "onnx":
                assert isinstance(weights_entry, raw_nodes.OnnxWeightsEntry)
                if weights_entry.opset_version is missing_:
                    # todo: raise ValidationError?
                    warnings.warn(f"missing 'opset_version' entry for weights format {weights_format}")
Пример #7
0
class RunMode(_BioImageIOSchema):
    name = fields.String(
        required=True, bioimageio_description="The name of the `run_mode`"
    )  # todo: limit valid run mode names
    kwargs = fields.Kwargs()