Пример #1
0
def train(
    proj_name: str,
    Model: str,
    dataset_cls: str,
    net_fn: str,
    net_args: Dict,
    dataset_args: Dict,
):
    """ Train Function """

    dataset_module = importlib.import_module(
        f"manythings.data.dta_{dataset_cls}")
    dataset_cls_ = getattr(dataset_module, dataset_cls)

    network_module = importlib.import_module(f"manythings.networks.{net_fn}")
    network_fn_ = getattr(network_module, net_fn)

    model_module = importlib.import_module(f"manythings.models.{Model}")
    model_cls_ = getattr(model_module, Model)

    config = {
        "model": Model,
        "dataset_cls": dataset_cls,
        "net_fn": net_fn,
        "net_args": net_args,
        "dataset_args": dataset_args
    }

    input_schema = Schema([
        TensorSpec(np.dtype(np.uint8), (-1, 71), "encoder_input"),
        TensorSpec(np.dtype(np.uint8), (-1, 93), "decoder_input")
    ])

    output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 93))])

    signature = ModelSignature(inputs=input_schema, outputs=output_schema)
    data = dataset_cls_()
    data.load_or_generate()
    data.preprocess()

    with wandb.init(project=proj_name, config=config):
        """"""
        config = wandb.config
        model = model_cls_(dataset_cls_, network_fn_, net_args, dataset_args)

        callbacks = [
            WandbCallback(
                # training_data=(
                #     [data.encoder_input_data, data.decoder_input_data],
                #     data.decoder_target_data
                # ),
                # log_weights=True,
                # log_gradients=True
            )
        ]

        model.fit(callbacks=callbacks)
        mlflow.keras.save_model(model.network,
                                "saved_models/seq2seq",
                                signature=signature)
Пример #2
0
 def test_dtype(nparray, dtype):
     schema = _infer_schema(nparray)
     assert schema == Schema([TensorSpec(np.dtype(dtype), (-1, ))])
     spec = schema.inputs[0]
     recreated_spec = TensorSpec.from_json_dict(**spec.to_dict())
     assert spec == recreated_spec
     enforced_array = _enforce_tensor_spec(nparray, spec)
     assert isinstance(enforced_array, np.ndarray)
Пример #3
0
def test_schema_creation_with_named_and_unnamed_spec():
    with pytest.raises(MlflowException) as ex:
        Schema([
            TensorSpec(np.dtype("float64"), (-1, ), "blah"),
            TensorSpec(np.dtype("float64"), (-1, ))
        ])
    assert "Creating Schema with a combination of named and unnamed columns" in ex.value.message

    with pytest.raises(MlflowException) as ex:
        Schema([ColSpec("double", "blah"), ColSpec("double")])
    assert "Creating Schema with a combination of named and unnamed columns" in ex.value.message
Пример #4
0
def test_schema_creation():
    # can create schema with named col specs
    Schema([ColSpec("double", "a"), ColSpec("integer", "b")])

    # can create schema with unnamed col specs
    Schema([ColSpec("double"), ColSpec("integer")])

    # can create schema with multiple named tensor specs
    Schema([TensorSpec(np.dtype("float64"), (-1,), "a"), TensorSpec(np.dtype("uint8"), (-1,), "b")])

    # can create schema with single unnamed tensor spec
    Schema([TensorSpec(np.dtype("float64"), (-1,))])

    # combination of tensor and col spec is not allowed
    with pytest.raises(MlflowException) as ex:
        Schema([TensorSpec(np.dtype("float64"), (-1,)), ColSpec("double")])
    assert "Please choose one of" in ex.value.message

    # combination of named and unnamed inputs is not allowed
    with pytest.raises(MlflowException) as ex:
        Schema(
            [TensorSpec(np.dtype("float64"), (-1,), "blah"), TensorSpec(np.dtype("float64"), (-1,))]
        )
    assert "Creating Schema with a combination of named and unnamed inputs" in ex.value.message

    with pytest.raises(MlflowException) as ex:
        Schema([ColSpec("double", "blah"), ColSpec("double")])
    assert "Creating Schema with a combination of named and unnamed inputs" in ex.value.message

    # multiple unnamed tensor specs is not allowed
    with pytest.raises(MlflowException) as ex:
        Schema([TensorSpec(np.dtype("double"), (-1,)), TensorSpec(np.dtype("double"), (-1,))])
    assert "Creating Schema with multiple unnamed TensorSpecs is not supported" in ex.value.message
Пример #5
0
def test_model_signature_with_colspec_and_tensorspec():
    signature1 = ModelSignature(inputs=Schema([ColSpec(DataType.double)]))
    signature2 = ModelSignature(inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]))
    assert signature1 != signature2
    assert signature2 != signature1

    signature3 = ModelSignature(
        inputs=Schema([ColSpec(DataType.double)]),
        outputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
    )
    signature4 = ModelSignature(
        inputs=Schema([ColSpec(DataType.double)]), outputs=Schema([ColSpec(DataType.double)]),
    )
    assert signature3 != signature4
    assert signature4 != signature3
Пример #6
0
def test_model_load_input_example_failures():
    with TempDir(chdr=True) as tmp:
        input_example = np.array([[3, 4, 5]], dtype=np.int32)
        sig = ModelSignature(
            inputs=Schema([
                TensorSpec(type=input_example.dtype, shape=input_example.shape)
            ]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )

        local_path, _ = _log_model_with_signature_and_example(
            tmp, sig, input_example)
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        loaded_example = loaded_model.load_input_example(local_path)
        assert loaded_example is not None

        with pytest.raises(FileNotFoundError,
                           match="No such file or directory"):
            loaded_model.load_input_example(
                os.path.join(local_path, "folder_which_does_not_exist"))

        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        os.remove(path)
        with pytest.raises(FileNotFoundError,
                           match="No such file or directory"):
            loaded_model.load_input_example(local_path)
Пример #7
0
    def on_train_end(self, args, state, control, **kwargs):
        input_schema = Schema([ColSpec(name="text", type="string")])
        output_schema = Schema([TensorSpec(np.dtype(np.float), (-1, -1))])
        signature = ModelSignature(inputs=input_schema, outputs=output_schema)

        pyfunc.log_model(
            # artifact path is _relative_ to run root in mlflow
            artifact_path="bert_classifier_model",
            # Dir with the module files for dependencies
            code_path=[
                os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "models.py"),
                os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             "utils.py")
            ],
            python_model=MLFlowBertClassificationModel(),
            artifacts={
                "model": state.best_model_checkpoint,
            },
            conda_env={
                'name':
                'classifier-env',
                'channels': ['defaults', 'pytorch', 'pypi'],
                'dependencies': [
                    'python=3.8.8', 'pip', 'pytorch=1.8.0', {
                        'pip': [
                            'transformers==4.4.2', 'mlflow==1.15.0',
                            'numpy==1.20.1'
                        ]
                    }
                ]
            },
            signature=signature,
            await_registration_for=5,
            registered_model_name=self.registered_name)
Пример #8
0
def test_schema_inference_on_dictionary(dict_of_ndarrays):
    # test dictionary
    schema = _infer_schema(dict_of_ndarrays)
    assert schema == Schema([
        TensorSpec(tensor.dtype, _get_tensor_shape(tensor), name)
        for name, tensor in dict_of_ndarrays.items()
    ])
    # test exception is raised if non-numpy data in dictionary
    with pytest.raises(TypeError):
        _infer_schema({"x": 1})
    with pytest.raises(TypeError):
        _infer_schema({"x": [1]})
Пример #9
0
def test_model_load_input_example_no_signature():
    with TempDir(chdr=True) as tmp:
        input_example = np.array([[3, 4, 5]], dtype=np.int32)
        sig = ModelSignature(
            inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )

        local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example=None)
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        loaded_example = loaded_model.load_input_example(local_path)
        assert loaded_example is None
Пример #10
0
def test_schema_inference_on_numpy_array(pandas_df_with_all_types):
    for col in pandas_df_with_all_types:
        data = pandas_df_with_all_types[col].to_numpy()
        schema = _infer_schema(data)
        assert schema == Schema([TensorSpec(type=data.dtype, shape=(-1, ))])

    # test boolean
    schema = _infer_schema(np.array([True, False, True], dtype=np.bool_))
    assert schema == Schema([TensorSpec(np.dtype(np.bool_), (-1, ))])

    # test bytes
    schema = _infer_schema(np.array([bytes([1])], dtype=np.bytes_))
    assert schema == Schema([TensorSpec(np.dtype("S1"), (-1, ))])

    # test (u)ints
    for t in [
            np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32,
            np.uint64, np.int64
    ]:
        schema = _infer_schema(np.array([1, 2, 3], dtype=t))
        assert schema == Schema([TensorSpec(np.dtype(t), (-1, ))])

    # test floats
    for t in [np.float16, np.float32, np.float64]:
        schema = _infer_schema(np.array([1.1, 2.2, 3.3], dtype=t))
        assert schema == Schema([TensorSpec(np.dtype(t), (-1, ))])

    if hasattr(np, "float128"):
        schema = _infer_schema(np.array([1.1, 2.2, 3.3], dtype=np.float128))
        assert schema == Schema([TensorSpec(np.dtype(np.float128), (-1, ))])
Пример #11
0
def test_model_load_input_example_scipy():
    with TempDir(chdr=True) as tmp:
        input_example = csc_matrix(np.arange(0, 12, 0.5).reshape(3, 8))
        sig = ModelSignature(
            inputs=Schema([TensorSpec(type=input_example.data.dtype, shape=input_example.shape)]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )

        local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        loaded_example = loaded_model.load_input_example(local_path)

        assert isinstance(loaded_example, csc_matrix)
        assert np.array_equal(input_example.data, loaded_example.data)
Пример #12
0
def test_schema_creation_with_tensor_and_col_spec():
    with pytest.raises(MlflowException) as ex:
        Schema([TensorSpec(np.dtype("float64"), (-1, )), ColSpec("double")])
    assert "Please choose one of" in ex.value.message
Пример #13
0
def test_tensor_spec():
    a1 = TensorSpec(np.dtype("float64"), (-1, 3, 3), "a")
    a2 = TensorSpec(np.dtype("float"), (-1, 3, 3),
                    "a")  # float defaults to float64
    a3 = TensorSpec(np.dtype("float"), [-1, 3, 3], "a")
    a4 = TensorSpec(np.dtype("int"), (-1, 3, 3), "a")
    assert a1 == a2
    assert a1 == a3
    assert a1 != a4
    b1 = TensorSpec(np.dtype("float64"), (-1, 3, 3), "b")
    assert b1 != a1
    with pytest.raises(TypeError) as ex1:
        TensorSpec("Unsupported", (-1, 3, 3), "a")
    assert "Expected `type` to be instance" in str(ex1.value)
    with pytest.raises(TypeError) as ex2:
        TensorSpec(np.dtype("float64"), np.array([-1, 2, 3]), "b")
    assert "Expected `shape` to be instance" in str(ex2.value)

    a5 = TensorSpec.from_json_dict(**a1.to_dict())
    assert a5 == a1
    assert TensorSpec.from_json_dict(
        **json.loads(json.dumps(a1.to_dict()))) == a1
    a6 = TensorSpec(np.dtype("float64"), (-1, 3, 3))
    a7 = TensorSpec(np.dtype("float64"), (-1, 3, 3), None)
    assert a6 == a7
    assert TensorSpec.from_json_dict(
        **json.loads(json.dumps(a6.to_dict()))) == a6
Пример #14
0
def _infer_schema(data: Any) -> Schema:
    """
    Infer an MLflow schema from a dataset.

    Data inputted as a numpy array or a dictionary is represented by :py:class:`TensorSpec`.
    All other inputted data types are specified by :py:class:`ColSpec`.

    A `TensorSpec` captures the data shape (default variable axis is 0), the data type (numpy.dtype)
    and an optional name for each individual tensor of the dataset.
    A `ColSpec` captures the data type (defined in :py:class:`DataType`) and an optional name for
    each individual column of the dataset.

    This method will raise an exception if the user data contains incompatible types or is not
    passed in one of the supported formats (containers).

    The input should be one of these:
      - pandas.DataFrame or pandas.Series
      - dictionary of { name -> numpy.ndarray}
      - numpy.ndarray
      - pyspark.sql.DataFrame

    The element types should be mappable to one of :py:class:`mlflow.models.signature.DataType` for
    dataframes and to one of numpy types for tensors.

    :param data: Dataset to infer from.

    :return: Schema
    """
    if isinstance(data, dict):
        res = []
        for name in data.keys():
            ndarray = data[name]
            if not isinstance(ndarray, np.ndarray):
                raise TypeError("Data in the dictionary must be of type numpy.ndarray")
            res.append(TensorSpec(type=ndarray.dtype, shape=_get_tensor_shape(ndarray), name=name))
        schema = Schema(res)
    elif isinstance(data, pd.Series):
        schema = Schema([ColSpec(type=_infer_pandas_column(data))])
    elif isinstance(data, pd.DataFrame):
        schema = Schema(
            [ColSpec(type=_infer_pandas_column(data[col]), name=col) for col in data.columns]
        )
    elif isinstance(data, np.ndarray):
        schema = Schema([TensorSpec(type=data.dtype, shape=_get_tensor_shape(data))])
    elif _is_spark_df(data):
        schema = Schema(
            [
                ColSpec(type=_infer_spark_type(field.dataType), name=field.name)
                for field in data.schema.fields
            ]
        )
    else:
        raise TypeError(
            "Expected one of (pandas.DataFrame, numpy array, "
            "dictionary of (name -> numpy.ndarray), pyspark.sql.DataFrame) "
            "but got '{}'".format(type(data))
        )
    if not schema.is_tensor_spec() and any(
        [t in (DataType.integer, DataType.long) for t in schema.column_types()]
    ):
        warnings.warn(
            "Hint: Inferred schema contains integer column(s). Integer columns in "
            "Python cannot represent missing values. If your input data contains "
            "missing values at inference time, it will be encoded as floats and will "
            "cause a schema enforcement error. The best way to avoid this problem is "
            "to infer the model schema based on a realistic data sample (training "
            "dataset) that includes missing values. Alternatively, you can declare "
            "integer columns as doubles (float64) whenever these columns may have "
            "missing values. See `Handling Integers With Missing Values "
            "<https://www.mlflow.org/docs/latest/models.html#"
            "handling-integers-with-missing-values>`_ for more details.",
            stacklevel=2,
        )
    return schema
Пример #15
0
def test_model_signature_with_tensorspec():
    signature1 = ModelSignature(
        inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
        outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10))]),
    )
    signature2 = ModelSignature(
        inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
        outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10))]),
    )
    # Single type mismatch
    assert signature1 == signature2
    signature3 = ModelSignature(
        inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
        outputs=Schema([TensorSpec(np.dtype("int"), (-1, 10))]),
    )
    assert signature3 != signature1
    # Name mismatch
    signature4 = ModelSignature(
        inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]),
        outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10), "misMatch")]),
    )
    assert signature3 != signature4
    as_json = json.dumps(signature1.to_dict())
    signature5 = ModelSignature.from_dict(json.loads(as_json))
    assert signature1 == signature5

    # Test with name
    signature6 = ModelSignature(
        inputs=Schema([
            TensorSpec(np.dtype("float"), (-1, 28, 28), name="image"),
            TensorSpec(np.dtype("int"), (-1, 10), name="metadata"),
        ]),
        outputs=Schema(
            [TensorSpec(np.dtype("float"), (-1, 10), name="outputs")]),
    )
    signature7 = ModelSignature(
        inputs=Schema([
            TensorSpec(np.dtype("float"), (-1, 28, 28), name="image"),
            TensorSpec(np.dtype("int"), (-1, 10), name="metadata"),
        ]),
        outputs=Schema(
            [TensorSpec(np.dtype("float"), (-1, 10), name="outputs")]),
    )
    assert signature6 == signature7
    assert signature1 != signature6

    # Test w/o output
    signature8 = ModelSignature(inputs=Schema(
        [TensorSpec(np.dtype("float"), (-1, 28, 28))]),
                                outputs=None)
    as_json = json.dumps(signature8.to_dict())
    signature9 = ModelSignature.from_dict(json.loads(as_json))
    assert signature8 == signature9
Пример #16
0
def test_schema_inference_on_basic_numpy(pandas_df_with_all_types):
    for col in pandas_df_with_all_types:
        data = pandas_df_with_all_types[col].to_numpy()
        schema = _infer_schema(data)
        assert schema == Schema([TensorSpec(type=data.dtype, shape=(-1, ))])
testX = test_X.reshape((test_X.shape[0], 28, 28, 1))
trainY = tf.keras.utils.to_categorical(train_Y)
testY = tf.keras.utils.to_categorical(test_Y)

model = tf.keras.models.Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(10, activation='softmax'))
opt = SGD(lr=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(trainX, trainY, epochs=1, batch_size=32, validation_data=(testX, testY))

input_schema = Schema([
  TensorSpec(np.dtype(np.uint8), (-1, 28, 28, 1)),
])
output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 10))])
signature = ModelSignature(inputs=input_schema, outputs=output_schema)
input_example = np.array([
   [[  0,   0,   0,   0],
    [  0, 134,  25,  56],
    [253, 242, 195,   6],
    [  0,  93,  82,  82]],
   [[  0,  23,  46,   0],
    [ 33,  13,  36, 166],
    [ 76,  75,   0, 255],
    [ 33,  44,  11,  82]]
], dtype=np.uint8)

mlflow.keras.log_model(model, "mnist_cnn", signature=signature, input_example=input_example)
Пример #18
0
def test_tensor_spec():
    a1 = TensorSpec(np.dtype("float64"), (-1, 3, 3), "a")
    a2 = TensorSpec(np.dtype("float"), (-1, 3, 3),
                    "a")  # float defaults to float64
    a3 = TensorSpec(np.dtype("float"), [-1, 3, 3], "a")
    a4 = TensorSpec(np.dtype("int"), (-1, 3, 3), "a")
    assert a1 == a2
    assert a1 == a3
    assert a1 != a4
    b1 = TensorSpec(np.dtype("float64"), (-1, 3, 3), "b")
    assert b1 != a1
    with pytest.raises(TypeError, match="Expected `type` to be instance"):
        TensorSpec("Unsupported", (-1, 3, 3), "a")
    with pytest.raises(TypeError, match="Expected `shape` to be instance"):
        TensorSpec(np.dtype("float64"), np.array([-1, 2, 3]), "b")
    with pytest.raises(
            MlflowException,
            match=
            "MLflow does not support size information in flexible numpy data types",
    ):
        TensorSpec(np.dtype("<U10"), (-1, ), "b")

    a5 = TensorSpec.from_json_dict(**a1.to_dict())
    assert a5 == a1
    assert TensorSpec.from_json_dict(
        **json.loads(json.dumps(a1.to_dict()))) == a1
    a6 = TensorSpec(np.dtype("float64"), (-1, 3, 3))
    a7 = TensorSpec(np.dtype("float64"), (-1, 3, 3), None)
    assert a6 == a7
    assert TensorSpec.from_json_dict(
        **json.loads(json.dumps(a6.to_dict()))) == a6
Пример #19
0
    Parameters,
)

from mlserver_mlflow.metadata import (
    InputSpec,
    _get_content_type,
    _get_shape,
    to_metadata_tensors,
)


@pytest.mark.parametrize(
    "input_spec, expected",
    [
        (
            TensorSpec(name="foo", shape=(2, 2), type=np.dtype("int32")),
            ("INT32", NumpyCodec.ContentType),
        ),
        (
            ColSpec(name="foo", type=DataType.string),
            ("BYTES", StringCodec.ContentType),
        ),
        (
            ColSpec(name="foo", type=DataType.binary),
            ("BYTES", Base64Codec.ContentType),
        ),
    ],
)
def test_get_content_type(input_spec: InputSpec, expected: Tuple[str, str]):
    datatype, content_type = _get_content_type(input_spec)
    assert (datatype, content_type) == expected