Python _dataframe_from_json示例，mlflow.utils.proto_json_utils._dataframe_from_json Python示例

示例#1

0

显示文件

文件： test_model_input_examples.py 项目： ztxjack/mlflow

def test_input_examples(pandas_df_with_all_types):
    sig = infer_signature(pandas_df_with_all_types)
    # test setting example with data frame with all supported data types
    with TempDir() as tmp:
        example = _Example(pandas_df_with_all_types)
        example.save(tmp.path())
        filename = example.info["artifact_path"]
        with open(tmp.path(filename), "r") as f:
            data = json.load(f)
            assert set(data.keys()) == set(("columns", "data"))
        parsed_df = _dataframe_from_json(tmp.path(filename), schema=sig.inputs)
        assert (pandas_df_with_all_types == parsed_df).all().all()
        # the frame read without schema should match except for the binary values
        assert (parsed_df.drop(columns=["binary"]) == _dataframe_from_json(tmp.path(filename))
                .drop(columns=["binary"])).all().all()

    # pass the input as dictionary instead
    with TempDir() as tmp:
        d = {name: pandas_df_with_all_types[name].values
             for name in pandas_df_with_all_types.columns}
        example = _Example(d)
        example.save(tmp.path())
        filename = example.info["artifact_path"]
        parsed_df = _dataframe_from_json(tmp.path(filename), sig.inputs)
        assert (pandas_df_with_all_types == parsed_df).all().all()

    # input passed as numpy array
    sig = infer_signature(pandas_df_with_all_types.values)
    with TempDir() as tmp:
        example = _Example(pandas_df_with_all_types.values)
        example.save(tmp.path())
        filename = example.info["artifact_path"]
        with open(tmp.path(filename), "r") as f:
            data = json.load(f)
            assert set(data.keys()) == set(("data",))
        parsed_ary = _dataframe_from_json(tmp.path(filename), schema=sig.inputs).values
        assert (pandas_df_with_all_types.values == parsed_ary).all().all()

    # pass multidimensional array
    with TempDir() as tmp:
        example = np.array([[[1, 2, 3]]])
        with pytest.raises(TensorsNotSupportedException):
            _Example(example)

    # pass multidimensional array
    with TempDir() as tmp:
        example = np.array([[1, 2, 3]])
        with pytest.raises(TensorsNotSupportedException):
            _Example({"x": example, "y": example})

    # pass dict with scalars
    with TempDir() as tmp:
        example = {"a": 1, "b": "abc"}
        x = _Example(example)
        x.save(tmp.path())
        filename = x.info["artifact_path"]
        parsed_df = _dataframe_from_json(tmp.path(filename))
        assert example == parsed_df.to_dict(orient="records")[0]

示例#2

0

显示文件

文件： test_model.py 项目： anjosma/mlflow_experiments

def test_model_log():
    with TempDir(chdr=True) as tmp:
        experiment_id = mlflow.create_experiment("test")
        sig = ModelSignature(
            inputs=Schema([ColSpec("integer", "x"),
                           ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]))
        input_example = {"x": 1, "y": 2}
        with mlflow.start_run(experiment_id=experiment_id) as r:
            Model.log("some/path",
                      TestFlavor,
                      signature=sig,
                      input_example=input_example)

        local_path = _download_artifact_from_uri("runs:/{}/some/path".format(
            r.info.run_id),
                                                 output_path=tmp.path(""))
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        assert loaded_model.run_id == r.info.run_id
        assert loaded_model.artifact_path == "some/path"
        assert loaded_model.flavors == {
            "flavor1": {
                "a": 1,
                "b": 2
            },
            "flavor2": {
                "x": 1,
                "y": 2
            },
        }
        assert loaded_model.signature == sig
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path)
        assert x.to_dict(orient="records")[0] == input_example

示例#3

0

显示文件

文件： test_model.py 项目： wwjiang007/mlflow

def test_model_log_with_input_example_succeeds():
    with TempDir(chdr=True) as tmp:
        sig = ModelSignature(
            inputs=Schema([
                ColSpec("integer", "a"),
                ColSpec("string", "b"),
                ColSpec("boolean", "c"),
                ColSpec("string", "d"),
                ColSpec("datetime", "e"),
            ]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = pd.DataFrame(
            {
                "a": np.int32(1),
                "b": "test string",
                "c": True,
                "d": date.today(),
                "e": np.datetime64("2020-01-01T00:00:00"),
            },
            index=[0],
        )

        local_path, _ = _log_model_with_signature_and_example(
            tmp, sig, input_example)
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path, schema=sig.inputs)

        # date column will get deserialized into string
        input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
        assert x.equals(input_example)

示例#4

0

显示文件

文件： utils.py 项目： bkbonde/mlflow

def _read_example(mlflow_model: Model, path: str):
    """
    Read example from a model directory. Returns None if there is no example metadata (i.e. the
    model was saved without example). Raises FileNotFoundError if there is model metadata but the
    example file is missing.

    :param mlflow_model: Model metadata.
    :param path: Path to the model directory.
    :return: Input example or None if the model has no example.
    """
    if mlflow_model.saved_input_example_info is None:
        return None
    example_type = mlflow_model.saved_input_example_info["type"]
    if example_type not in ["dataframe", "ndarray", "sparse_matrix_csc", "sparse_matrix_csr"]:
        raise MlflowException(
            "This version of mlflow can not load example of type {}".format(example_type)
        )
    input_schema = mlflow_model.signature.inputs if mlflow_model.signature is not None else None
    path = os.path.join(path, mlflow_model.saved_input_example_info["artifact_path"])
    if example_type == "ndarray":
        return _read_tensor_input_from_json(path, schema=input_schema)
    elif example_type in ["sparse_matrix_csc", "sparse_matrix_csr"]:
        return _read_sparse_matrix_from_json(path, example_type)
    else:
        return _dataframe_from_json(path, schema=input_schema, precise_float=True)

示例#5

0

显示文件

文件： test_model.py 项目： wwjiang007/mlflow

def test_model_log():
    with TempDir(chdr=True) as tmp:
        sig = ModelSignature(
            inputs=Schema([ColSpec("integer", "x"),
                           ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = {"x": 1, "y": 2}
        local_path, r = _log_model_with_signature_and_example(
            tmp, sig, input_example)

        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        assert loaded_model.run_id == r.info.run_id
        assert loaded_model.artifact_path == "some/path"
        assert loaded_model.flavors == {
            "flavor1": {
                "a": 1,
                "b": 2
            },
            "flavor2": {
                "x": 1,
                "y": 2
            },
        }
        assert loaded_model.signature == sig
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path)
        assert x.to_dict(orient="records")[0] == input_example
        assert not hasattr(loaded_model, "databricks_runtime")

示例#6

0

显示文件

文件： test_model.py 项目： wwjiang007/mlflow

def test_model_log_with_databricks_runtime():
    dbr = "8.3.x-snapshot-gpu-ml-scala2.12"
    with TempDir(chdr=True) as tmp, mock.patch(
            "mlflow.models.model.get_databricks_runtime", return_value=dbr):
        sig = ModelSignature(
            inputs=Schema([ColSpec("integer", "x"),
                           ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = {"x": 1, "y": 2}
        local_path, r = _log_model_with_signature_and_example(
            tmp, sig, input_example)

        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        assert loaded_model.run_id == r.info.run_id
        assert loaded_model.artifact_path == "some/path"
        assert loaded_model.flavors == {
            "flavor1": {
                "a": 1,
                "b": 2
            },
            "flavor2": {
                "x": 1,
                "y": 2
            },
        }
        assert loaded_model.signature == sig
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path)
        assert x.to_dict(orient="records")[0] == input_example
        assert loaded_model.databricks_runtime == dbr

示例#7

0

显示文件

def test_input_examples_with_nan(df_with_nan, dict_of_ndarrays_with_nans):
    # test setting example with data frame with NaN values in it
    sig = infer_signature(df_with_nan)
    with TempDir() as tmp:
        example = _Example(df_with_nan)
        example.save(tmp.path())
        filename = example.info["artifact_path"]
        with open(tmp.path(filename), "r") as f:
            data = json.load(f)
            assert set(data.keys()) == set(("columns", "data"))
        parsed_df = _dataframe_from_json(tmp.path(filename), schema=sig.inputs)
        # by definition of NaN, NaN == NaN is False but NaN != NaN is True
        assert (
            ((df_with_nan == parsed_df) | ((df_with_nan != df_with_nan) & (parsed_df != parsed_df)))
            .all()
            .all()
        )
        # the frame read without schema should match except for the binary values
        no_schema_df = _dataframe_from_json(tmp.path(filename))
        a = parsed_df.drop(columns=["binary"])
        b = no_schema_df.drop(columns=["binary"])
        assert ((a == b) | ((a != a) & (b != b))).all().all()

    # pass multidimensional array
    for col in dict_of_ndarrays_with_nans:
        input_example = dict_of_ndarrays_with_nans[col]
        sig = infer_signature(input_example)
        with TempDir() as tmp:
            example = _Example(input_example)
            example.save(tmp.path())
            filename = example.info["artifact_path"]
            parsed_ary = _read_tensor_input_from_json(tmp.path(filename), schema=sig.inputs)
            assert np.array_equal(parsed_ary, input_example, equal_nan=True)

            # without a schema/dtype specified, the resulting tensor will keep the None type
            no_schema_df = _read_tensor_input_from_json(tmp.path(filename))
            assert np.array_equal(
                no_schema_df, np.where(np.isnan(input_example), None, input_example)
            )

示例#8

0

显示文件

def test_model_info():
    with TempDir(chdr=True) as tmp:
        sig = ModelSignature(
            inputs=Schema([ColSpec("integer", "x"),
                           ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = {"x": 1, "y": 2}

        experiment_id = mlflow.create_experiment("test")
        with mlflow.start_run(experiment_id=experiment_id) as run:
            model_info = Model.log("some/path",
                                   TestFlavor,
                                   signature=sig,
                                   input_example=input_example)
        local_path = _download_artifact_from_uri("runs:/{}/some/path".format(
            run.info.run_id),
                                                 output_path=tmp.path(""))

        assert model_info.run_id == run.info.run_id
        assert model_info.artifact_path == "some/path"
        assert model_info.model_uri == "runs:/{}/some/path".format(
            run.info.run_id)

        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        assert model_info.utc_time_created == loaded_model.utc_time_created
        assert model_info.model_uuid == loaded_model.model_uuid

        assert model_info.flavors == {
            "flavor1": {
                "a": 1,
                "b": 2
            },
            "flavor2": {
                "x": 1,
                "y": 2
            },
        }

        path = os.path.join(
            local_path, model_info.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path)
        assert x.to_dict(orient="records")[0] == input_example

        assert model_info.signature_dict == sig.to_dict()

        assert Version(model_info.mlflow_version) == Version(
            loaded_model.mlflow_version)

示例#9

0

显示文件

文件： utils.py 项目： anjosma/mlflow_experiments

def _read_example(mlflow_model: Model, path: str):
    """
    Read example from a model directory. Returns None if there is no example metadata (i.e. the
    model was saved without example). Raises IO Exception if there is model metadata but the example
    file is missing.

    :param mlflow_model: Model metadata.
    :param path: Path to the model directory.
    :return: Input example or None if the model has no example.
    """
    if mlflow_model.saved_input_example_info is None:
        return None
    example_type = mlflow_model.saved_input_example_info["type"]
    if example_type != "dataframe":
        raise MlflowException("This version of mlflow can not load example of type {}".format(
            example_type))
    input_schema = mlflow_model.signature.inputs if mlflow_model.signature is not None else None
    path = os.path.join(path, mlflow_model.saved_input_example_info["artifact_path"])
    return _dataframe_from_json(path, schema=input_schema, precise_float=True)

示例#10

0

显示文件

文件： __init__.py 项目： anjosma/mlflow_experiments

def parse_json_input(json_input, orient="split", schema: Schema=None):
    """
    :param json_input: A JSON-formatted string representation of a Pandas DataFrame, or a stream
                       containing such a string representation.
    :param orient: The Pandas DataFrame orientation of the JSON input. This is either 'split'
                   or 'records'.
    :param schema: Optional schema specification to be used during parsing.
    """
    # pylint: disable=broad-except
    try:
        return _dataframe_from_json(json_input, pandas_orient=orient, schema=schema)
    except Exception:
        _handle_serving_error(
            error_message=(
                "Failed to parse input as a Pandas DataFrame. Ensure that the input is"
                " a valid JSON-formatted Pandas DataFrame with the `{orient}` orient"
                " produced using the `pandas.DataFrame.to_json(..., orient='{orient}')`"
                " method.".format(orient=orient)),
            error_code=MALFORMED_REQUEST)

示例#11

0

显示文件

def test_model_log_with_input_example_succeeds():
    with TempDir(chdr=True) as tmp:
        experiment_id = mlflow.create_experiment("test")
        sig = ModelSignature(
            inputs=Schema([
                ColSpec("integer", "a"),
                ColSpec("string", "b"),
                ColSpec("boolean", "c"),
                ColSpec("string", "d"),
                ColSpec("datetime", "e"),
            ]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = pd.DataFrame(
            {
                "a": np.int32(1),
                "b": "test string",
                "c": True,
                "d": date.today(),
                "e": np.datetime64("2020-01-01T00:00:00"),
            },
            index=[0],
        )
        with mlflow.start_run(experiment_id=experiment_id) as r:
            Model.log("some/path",
                      TestFlavor,
                      signature=sig,
                      input_example=input_example)

        local_path = _download_artifact_from_uri("runs:/{}/some/path".format(
            r.info.run_id),
                                                 output_path=tmp.path(""))
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path, schema=sig.inputs)

        # date column will get deserialized into string
        input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
        assert x.equals(input_example)

示例#12

0

显示文件

def test_dataframe_from_json():
    source = pd.DataFrame(
        {
            "boolean": [True, False, True],
            "string": ["a", "b", "c"],
            "float": np.array([1.2, 2.3, 3.4], dtype=np.float32),
            "double": np.array([1.2, 2.3, 3.4], dtype=np.float64),
            "integer": np.array([3, 4, 5], dtype=np.int32),
            "long": np.array([3, 4, 5], dtype=np.int64),
            "binary": [bytes([1, 2, 3]),
                       bytes([4, 5]),
                       bytes([6])],
            "date_string": ["2018-02-03", "1996-03-02", "2021-03-05"],
        },
        columns=[
            "boolean",
            "string",
            "float",
            "double",
            "integer",
            "long",
            "binary",
            "date_string",
        ],
    )

    jsonable_df = pd.DataFrame(source, copy=True)
    jsonable_df["binary"] = jsonable_df["binary"].map(base64.b64encode)
    schema = Schema([
        ColSpec("boolean", "boolean"),
        ColSpec("string", "string"),
        ColSpec("float", "float"),
        ColSpec("double", "double"),
        ColSpec("integer", "integer"),
        ColSpec("long", "long"),
        ColSpec("binary", "binary"),
        ColSpec("string", "date_string"),
    ])
    parsed = _dataframe_from_json(jsonable_df.to_json(orient="split"),
                                  pandas_orient="split",
                                  schema=schema)
    assert parsed.equals(source)
    parsed = _dataframe_from_json(jsonable_df.to_json(orient="records"),
                                  pandas_orient="records",
                                  schema=schema)
    assert parsed.equals(source)
    # try parsing with tensor schema
    tensor_schema = Schema([
        TensorSpec(np.dtype("bool"), [-1], "boolean"),
        TensorSpec(np.dtype("str"), [-1], "string"),
        TensorSpec(np.dtype("float32"), [-1], "float"),
        TensorSpec(np.dtype("float64"), [-1], "double"),
        TensorSpec(np.dtype("int32"), [-1], "integer"),
        TensorSpec(np.dtype("int64"), [-1], "long"),
        TensorSpec(np.dtype(bytes), [-1], "binary"),
    ])
    parsed = _dataframe_from_json(jsonable_df.to_json(orient="split"),
                                  pandas_orient="split",
                                  schema=tensor_schema)

    # NB: tensor schema does not automatically decode base64 encoded bytes.
    assert parsed.equals(jsonable_df)
    parsed = _dataframe_from_json(jsonable_df.to_json(orient="records"),
                                  pandas_orient="records",
                                  schema=tensor_schema)

    # NB: tensor schema does not automatically decode base64 encoded bytes.
    assert parsed.equals(jsonable_df)

    # Test parse with TesnorSchema with a single tensor
    tensor_schema = Schema([TensorSpec(np.dtype("float32"), [-1, 3])])
    source = pd.DataFrame(
        {
            "a": np.array([1, 2, 3], dtype=np.float32),
            "b": np.array([4.1, 5.2, 6.3], dtype=np.float32),
            "c": np.array([7, 8, 9], dtype=np.float32),
        },
        columns=["a", "b", "c"],
    )
    assert source.equals(
        _dataframe_from_json(source.to_json(orient="split"),
                             pandas_orient="split",
                             schema=tensor_schema))
    assert source.equals(
        _dataframe_from_json(source.to_json(orient="records"),
                             pandas_orient="records",
                             schema=tensor_schema))

示例#13

0

显示文件

def test_input_examples(pandas_df_with_all_types, dict_of_ndarrays):
    sig = infer_signature(pandas_df_with_all_types)
    # test setting example with data frame with all supported data types
    with TempDir() as tmp:
        example = _Example(pandas_df_with_all_types)
        example.save(tmp.path())
        filename = example.info["artifact_path"]
        with open(tmp.path(filename), "r") as f:
            data = json.load(f)
            assert set(data.keys()) == set(("columns", "data"))
        parsed_df = _dataframe_from_json(tmp.path(filename), schema=sig.inputs)
        assert (pandas_df_with_all_types == parsed_df).all().all()
        # the frame read without schema should match except for the binary values
        assert ((parsed_df.drop(columns=["binary"]) == _dataframe_from_json(
            tmp.path(filename)).drop(columns=["binary"])).all().all())

    # NB: Drop columns that cannot be encoded by proto_json_utils.pyNumpyEncoder
    new_df = pandas_df_with_all_types.drop(
        columns=["boolean_ext", "integer_ext", "string_ext"])

    # pass the input as dictionary instead
    with TempDir() as tmp:
        d = {name: new_df[name].values for name in new_df.columns}
        example = _Example(d)
        example.save(tmp.path())
        filename = example.info["artifact_path"]
        parsed_dict = _read_tensor_input_from_json(tmp.path(filename))
        assert d.keys() == parsed_dict.keys()
        # Asserting binary will fail since it is converted to base64 encoded strings.
        # The check above suffices that the binary input is stored.
        del d["binary"]
        for key in d:
            assert np.array_equal(d[key], parsed_dict[key])

    # input passed as numpy array
    new_df = pandas_df_with_all_types.drop(columns=["binary"])
    for col in new_df:
        input_example = new_df[col].to_numpy()
        with TempDir() as tmp:
            example = _Example(input_example)
            example.save(tmp.path())
            filename = example.info["artifact_path"]
            parsed_ary = _read_tensor_input_from_json(tmp.path(filename))
            assert np.array_equal(parsed_ary, input_example)

    # pass multidimensional array
    for col in dict_of_ndarrays:
        input_example = dict_of_ndarrays[col]
        with TempDir() as tmp:
            example = _Example(input_example)
            example.save(tmp.path())
            filename = example.info["artifact_path"]
            parsed_ary = _read_tensor_input_from_json(tmp.path(filename))
            assert np.array_equal(parsed_ary, input_example)

    # pass multidimensional array as a list
    example = np.array([[1, 2, 3]])
    with pytest.raises(TensorsNotSupportedException):
        _Example([example, example])

    # pass dict with scalars
    with TempDir() as tmp:
        example = {"a": 1, "b": "abc"}
        x = _Example(example)
        x.save(tmp.path())
        filename = x.info["artifact_path"]
        parsed_df = _dataframe_from_json(tmp.path(filename))
        assert example == parsed_df.to_dict(orient="records")[0]