def train( proj_name: str, Model: str, dataset_cls: str, net_fn: str, net_args: Dict, dataset_args: Dict, ): """ Train Function """ dataset_module = importlib.import_module( f"manythings.data.dta_{dataset_cls}") dataset_cls_ = getattr(dataset_module, dataset_cls) network_module = importlib.import_module(f"manythings.networks.{net_fn}") network_fn_ = getattr(network_module, net_fn) model_module = importlib.import_module(f"manythings.models.{Model}") model_cls_ = getattr(model_module, Model) config = { "model": Model, "dataset_cls": dataset_cls, "net_fn": net_fn, "net_args": net_args, "dataset_args": dataset_args } input_schema = Schema([ TensorSpec(np.dtype(np.uint8), (-1, 71), "encoder_input"), TensorSpec(np.dtype(np.uint8), (-1, 93), "decoder_input") ]) output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 93))]) signature = ModelSignature(inputs=input_schema, outputs=output_schema) data = dataset_cls_() data.load_or_generate() data.preprocess() with wandb.init(project=proj_name, config=config): """""" config = wandb.config model = model_cls_(dataset_cls_, network_fn_, net_args, dataset_args) callbacks = [ WandbCallback( # training_data=( # [data.encoder_input_data, data.decoder_input_data], # data.decoder_target_data # ), # log_weights=True, # log_gradients=True ) ] model.fit(callbacks=callbacks) mlflow.keras.save_model(model.network, "saved_models/seq2seq", signature=signature)
def test_dtype(nparray, dtype): schema = _infer_schema(nparray) assert schema == Schema([TensorSpec(np.dtype(dtype), (-1, ))]) spec = schema.inputs[0] recreated_spec = TensorSpec.from_json_dict(**spec.to_dict()) assert spec == recreated_spec enforced_array = _enforce_tensor_spec(nparray, spec) assert isinstance(enforced_array, np.ndarray)
def test_schema_creation_with_named_and_unnamed_spec(): with pytest.raises(MlflowException) as ex: Schema([ TensorSpec(np.dtype("float64"), (-1, ), "blah"), TensorSpec(np.dtype("float64"), (-1, )) ]) assert "Creating Schema with a combination of named and unnamed columns" in ex.value.message with pytest.raises(MlflowException) as ex: Schema([ColSpec("double", "blah"), ColSpec("double")]) assert "Creating Schema with a combination of named and unnamed columns" in ex.value.message
def test_schema_creation(): # can create schema with named col specs Schema([ColSpec("double", "a"), ColSpec("integer", "b")]) # can create schema with unnamed col specs Schema([ColSpec("double"), ColSpec("integer")]) # can create schema with multiple named tensor specs Schema([TensorSpec(np.dtype("float64"), (-1,), "a"), TensorSpec(np.dtype("uint8"), (-1,), "b")]) # can create schema with single unnamed tensor spec Schema([TensorSpec(np.dtype("float64"), (-1,))]) # combination of tensor and col spec is not allowed with pytest.raises(MlflowException) as ex: Schema([TensorSpec(np.dtype("float64"), (-1,)), ColSpec("double")]) assert "Please choose one of" in ex.value.message # combination of named and unnamed inputs is not allowed with pytest.raises(MlflowException) as ex: Schema( [TensorSpec(np.dtype("float64"), (-1,), "blah"), TensorSpec(np.dtype("float64"), (-1,))] ) assert "Creating Schema with a combination of named and unnamed inputs" in ex.value.message with pytest.raises(MlflowException) as ex: Schema([ColSpec("double", "blah"), ColSpec("double")]) assert "Creating Schema with a combination of named and unnamed inputs" in ex.value.message # multiple unnamed tensor specs is not allowed with pytest.raises(MlflowException) as ex: Schema([TensorSpec(np.dtype("double"), (-1,)), TensorSpec(np.dtype("double"), (-1,))]) assert "Creating Schema with multiple unnamed TensorSpecs is not supported" in ex.value.message
def test_model_signature_with_colspec_and_tensorspec(): signature1 = ModelSignature(inputs=Schema([ColSpec(DataType.double)])) signature2 = ModelSignature(inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))])) assert signature1 != signature2 assert signature2 != signature1 signature3 = ModelSignature( inputs=Schema([ColSpec(DataType.double)]), outputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), ) signature4 = ModelSignature( inputs=Schema([ColSpec(DataType.double)]), outputs=Schema([ColSpec(DataType.double)]), ) assert signature3 != signature4 assert signature4 != signature3
def test_model_load_input_example_failures(): with TempDir(chdr=True) as tmp: input_example = np.array([[3, 4, 5]], dtype=np.int32) sig = ModelSignature( inputs=Schema([ TensorSpec(type=input_example.dtype, shape=input_example.shape) ]), outputs=Schema([ColSpec(name=None, type="double")]), ) local_path, _ = _log_model_with_signature_and_example( tmp, sig, input_example) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) loaded_example = loaded_model.load_input_example(local_path) assert loaded_example is not None with pytest.raises(FileNotFoundError, match="No such file or directory"): loaded_model.load_input_example( os.path.join(local_path, "folder_which_does_not_exist")) path = os.path.join( local_path, loaded_model.saved_input_example_info["artifact_path"]) os.remove(path) with pytest.raises(FileNotFoundError, match="No such file or directory"): loaded_model.load_input_example(local_path)
def on_train_end(self, args, state, control, **kwargs): input_schema = Schema([ColSpec(name="text", type="string")]) output_schema = Schema([TensorSpec(np.dtype(np.float), (-1, -1))]) signature = ModelSignature(inputs=input_schema, outputs=output_schema) pyfunc.log_model( # artifact path is _relative_ to run root in mlflow artifact_path="bert_classifier_model", # Dir with the module files for dependencies code_path=[ os.path.join(os.path.dirname(os.path.abspath(__file__)), "models.py"), os.path.join(os.path.dirname(os.path.abspath(__file__)), "utils.py") ], python_model=MLFlowBertClassificationModel(), artifacts={ "model": state.best_model_checkpoint, }, conda_env={ 'name': 'classifier-env', 'channels': ['defaults', 'pytorch', 'pypi'], 'dependencies': [ 'python=3.8.8', 'pip', 'pytorch=1.8.0', { 'pip': [ 'transformers==4.4.2', 'mlflow==1.15.0', 'numpy==1.20.1' ] } ] }, signature=signature, await_registration_for=5, registered_model_name=self.registered_name)
def test_schema_inference_on_dictionary(dict_of_ndarrays): # test dictionary schema = _infer_schema(dict_of_ndarrays) assert schema == Schema([ TensorSpec(tensor.dtype, _get_tensor_shape(tensor), name) for name, tensor in dict_of_ndarrays.items() ]) # test exception is raised if non-numpy data in dictionary with pytest.raises(TypeError): _infer_schema({"x": 1}) with pytest.raises(TypeError): _infer_schema({"x": [1]})
def test_model_load_input_example_no_signature(): with TempDir(chdr=True) as tmp: input_example = np.array([[3, 4, 5]], dtype=np.int32) sig = ModelSignature( inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]), outputs=Schema([ColSpec(name=None, type="double")]), ) local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example=None) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) loaded_example = loaded_model.load_input_example(local_path) assert loaded_example is None
def test_schema_inference_on_numpy_array(pandas_df_with_all_types): for col in pandas_df_with_all_types: data = pandas_df_with_all_types[col].to_numpy() schema = _infer_schema(data) assert schema == Schema([TensorSpec(type=data.dtype, shape=(-1, ))]) # test boolean schema = _infer_schema(np.array([True, False, True], dtype=np.bool_)) assert schema == Schema([TensorSpec(np.dtype(np.bool_), (-1, ))]) # test bytes schema = _infer_schema(np.array([bytes([1])], dtype=np.bytes_)) assert schema == Schema([TensorSpec(np.dtype("S1"), (-1, ))]) # test (u)ints for t in [ np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64 ]: schema = _infer_schema(np.array([1, 2, 3], dtype=t)) assert schema == Schema([TensorSpec(np.dtype(t), (-1, ))]) # test floats for t in [np.float16, np.float32, np.float64]: schema = _infer_schema(np.array([1.1, 2.2, 3.3], dtype=t)) assert schema == Schema([TensorSpec(np.dtype(t), (-1, ))]) if hasattr(np, "float128"): schema = _infer_schema(np.array([1.1, 2.2, 3.3], dtype=np.float128)) assert schema == Schema([TensorSpec(np.dtype(np.float128), (-1, ))])
def test_model_load_input_example_scipy(): with TempDir(chdr=True) as tmp: input_example = csc_matrix(np.arange(0, 12, 0.5).reshape(3, 8)) sig = ModelSignature( inputs=Schema([TensorSpec(type=input_example.data.dtype, shape=input_example.shape)]), outputs=Schema([ColSpec(name=None, type="double")]), ) local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) loaded_example = loaded_model.load_input_example(local_path) assert isinstance(loaded_example, csc_matrix) assert np.array_equal(input_example.data, loaded_example.data)
def test_schema_creation_with_tensor_and_col_spec(): with pytest.raises(MlflowException) as ex: Schema([TensorSpec(np.dtype("float64"), (-1, )), ColSpec("double")]) assert "Please choose one of" in ex.value.message
def test_tensor_spec(): a1 = TensorSpec(np.dtype("float64"), (-1, 3, 3), "a") a2 = TensorSpec(np.dtype("float"), (-1, 3, 3), "a") # float defaults to float64 a3 = TensorSpec(np.dtype("float"), [-1, 3, 3], "a") a4 = TensorSpec(np.dtype("int"), (-1, 3, 3), "a") assert a1 == a2 assert a1 == a3 assert a1 != a4 b1 = TensorSpec(np.dtype("float64"), (-1, 3, 3), "b") assert b1 != a1 with pytest.raises(TypeError) as ex1: TensorSpec("Unsupported", (-1, 3, 3), "a") assert "Expected `type` to be instance" in str(ex1.value) with pytest.raises(TypeError) as ex2: TensorSpec(np.dtype("float64"), np.array([-1, 2, 3]), "b") assert "Expected `shape` to be instance" in str(ex2.value) a5 = TensorSpec.from_json_dict(**a1.to_dict()) assert a5 == a1 assert TensorSpec.from_json_dict( **json.loads(json.dumps(a1.to_dict()))) == a1 a6 = TensorSpec(np.dtype("float64"), (-1, 3, 3)) a7 = TensorSpec(np.dtype("float64"), (-1, 3, 3), None) assert a6 == a7 assert TensorSpec.from_json_dict( **json.loads(json.dumps(a6.to_dict()))) == a6
def _infer_schema(data: Any) -> Schema: """ Infer an MLflow schema from a dataset. Data inputted as a numpy array or a dictionary is represented by :py:class:`TensorSpec`. All other inputted data types are specified by :py:class:`ColSpec`. A `TensorSpec` captures the data shape (default variable axis is 0), the data type (numpy.dtype) and an optional name for each individual tensor of the dataset. A `ColSpec` captures the data type (defined in :py:class:`DataType`) and an optional name for each individual column of the dataset. This method will raise an exception if the user data contains incompatible types or is not passed in one of the supported formats (containers). The input should be one of these: - pandas.DataFrame or pandas.Series - dictionary of { name -> numpy.ndarray} - numpy.ndarray - pyspark.sql.DataFrame The element types should be mappable to one of :py:class:`mlflow.models.signature.DataType` for dataframes and to one of numpy types for tensors. :param data: Dataset to infer from. :return: Schema """ if isinstance(data, dict): res = [] for name in data.keys(): ndarray = data[name] if not isinstance(ndarray, np.ndarray): raise TypeError("Data in the dictionary must be of type numpy.ndarray") res.append(TensorSpec(type=ndarray.dtype, shape=_get_tensor_shape(ndarray), name=name)) schema = Schema(res) elif isinstance(data, pd.Series): schema = Schema([ColSpec(type=_infer_pandas_column(data))]) elif isinstance(data, pd.DataFrame): schema = Schema( [ColSpec(type=_infer_pandas_column(data[col]), name=col) for col in data.columns] ) elif isinstance(data, np.ndarray): schema = Schema([TensorSpec(type=data.dtype, shape=_get_tensor_shape(data))]) elif _is_spark_df(data): schema = Schema( [ ColSpec(type=_infer_spark_type(field.dataType), name=field.name) for field in data.schema.fields ] ) else: raise TypeError( "Expected one of (pandas.DataFrame, numpy array, " "dictionary of (name -> numpy.ndarray), pyspark.sql.DataFrame) " "but got '{}'".format(type(data)) ) if not schema.is_tensor_spec() and any( [t in (DataType.integer, DataType.long) for t in schema.column_types()] ): warnings.warn( "Hint: Inferred schema contains integer column(s). Integer columns in " "Python cannot represent missing values. If your input data contains " "missing values at inference time, it will be encoded as floats and will " "cause a schema enforcement error. The best way to avoid this problem is " "to infer the model schema based on a realistic data sample (training " "dataset) that includes missing values. Alternatively, you can declare " "integer columns as doubles (float64) whenever these columns may have " "missing values. See `Handling Integers With Missing Values " "<https://www.mlflow.org/docs/latest/models.html#" "handling-integers-with-missing-values>`_ for more details.", stacklevel=2, ) return schema
def test_model_signature_with_tensorspec(): signature1 = ModelSignature( inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10))]), ) signature2 = ModelSignature( inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10))]), ) # Single type mismatch assert signature1 == signature2 signature3 = ModelSignature( inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=Schema([TensorSpec(np.dtype("int"), (-1, 10))]), ) assert signature3 != signature1 # Name mismatch signature4 = ModelSignature( inputs=Schema([TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=Schema([TensorSpec(np.dtype("float"), (-1, 10), "misMatch")]), ) assert signature3 != signature4 as_json = json.dumps(signature1.to_dict()) signature5 = ModelSignature.from_dict(json.loads(as_json)) assert signature1 == signature5 # Test with name signature6 = ModelSignature( inputs=Schema([ TensorSpec(np.dtype("float"), (-1, 28, 28), name="image"), TensorSpec(np.dtype("int"), (-1, 10), name="metadata"), ]), outputs=Schema( [TensorSpec(np.dtype("float"), (-1, 10), name="outputs")]), ) signature7 = ModelSignature( inputs=Schema([ TensorSpec(np.dtype("float"), (-1, 28, 28), name="image"), TensorSpec(np.dtype("int"), (-1, 10), name="metadata"), ]), outputs=Schema( [TensorSpec(np.dtype("float"), (-1, 10), name="outputs")]), ) assert signature6 == signature7 assert signature1 != signature6 # Test w/o output signature8 = ModelSignature(inputs=Schema( [TensorSpec(np.dtype("float"), (-1, 28, 28))]), outputs=None) as_json = json.dumps(signature8.to_dict()) signature9 = ModelSignature.from_dict(json.loads(as_json)) assert signature8 == signature9
def test_schema_inference_on_basic_numpy(pandas_df_with_all_types): for col in pandas_df_with_all_types: data = pandas_df_with_all_types[col].to_numpy() schema = _infer_schema(data) assert schema == Schema([TensorSpec(type=data.dtype, shape=(-1, ))])
testX = test_X.reshape((test_X.shape[0], 28, 28, 1)) trainY = tf.keras.utils.to_categorical(train_Y) testY = tf.keras.utils.to_categorical(test_Y) model = tf.keras.models.Sequential() model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1))) model.add(MaxPooling2D((2, 2))) model.add(Flatten()) model.add(Dense(100, activation='relu', kernel_initializer='he_uniform')) model.add(Dense(10, activation='softmax')) opt = SGD(lr=0.01, momentum=0.9) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) model.fit(trainX, trainY, epochs=1, batch_size=32, validation_data=(testX, testY)) input_schema = Schema([ TensorSpec(np.dtype(np.uint8), (-1, 28, 28, 1)), ]) output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 10))]) signature = ModelSignature(inputs=input_schema, outputs=output_schema) input_example = np.array([ [[ 0, 0, 0, 0], [ 0, 134, 25, 56], [253, 242, 195, 6], [ 0, 93, 82, 82]], [[ 0, 23, 46, 0], [ 33, 13, 36, 166], [ 76, 75, 0, 255], [ 33, 44, 11, 82]] ], dtype=np.uint8) mlflow.keras.log_model(model, "mnist_cnn", signature=signature, input_example=input_example)
def test_tensor_spec(): a1 = TensorSpec(np.dtype("float64"), (-1, 3, 3), "a") a2 = TensorSpec(np.dtype("float"), (-1, 3, 3), "a") # float defaults to float64 a3 = TensorSpec(np.dtype("float"), [-1, 3, 3], "a") a4 = TensorSpec(np.dtype("int"), (-1, 3, 3), "a") assert a1 == a2 assert a1 == a3 assert a1 != a4 b1 = TensorSpec(np.dtype("float64"), (-1, 3, 3), "b") assert b1 != a1 with pytest.raises(TypeError, match="Expected `type` to be instance"): TensorSpec("Unsupported", (-1, 3, 3), "a") with pytest.raises(TypeError, match="Expected `shape` to be instance"): TensorSpec(np.dtype("float64"), np.array([-1, 2, 3]), "b") with pytest.raises( MlflowException, match= "MLflow does not support size information in flexible numpy data types", ): TensorSpec(np.dtype("<U10"), (-1, ), "b") a5 = TensorSpec.from_json_dict(**a1.to_dict()) assert a5 == a1 assert TensorSpec.from_json_dict( **json.loads(json.dumps(a1.to_dict()))) == a1 a6 = TensorSpec(np.dtype("float64"), (-1, 3, 3)) a7 = TensorSpec(np.dtype("float64"), (-1, 3, 3), None) assert a6 == a7 assert TensorSpec.from_json_dict( **json.loads(json.dumps(a6.to_dict()))) == a6
Parameters, ) from mlserver_mlflow.metadata import ( InputSpec, _get_content_type, _get_shape, to_metadata_tensors, ) @pytest.mark.parametrize( "input_spec, expected", [ ( TensorSpec(name="foo", shape=(2, 2), type=np.dtype("int32")), ("INT32", NumpyCodec.ContentType), ), ( ColSpec(name="foo", type=DataType.string), ("BYTES", StringCodec.ContentType), ), ( ColSpec(name="foo", type=DataType.binary), ("BYTES", Base64Codec.ContentType), ), ], ) def test_get_content_type(input_spec: InputSpec, expected: Tuple[str, str]): datatype, content_type = _get_content_type(input_spec) assert (datatype, content_type) == expected