示例#1
0
 def _create():
     run_id = str(uuid.uuid4())
     experiment_id = str(random_int(10, 2000))
     user_id = random_str(random_int(10, 25))
     status = RunStatus.to_string(random.choice(RunStatus.all_status()))
     start_time = random_int(1, 10)
     end_time = start_time + random_int(1, 10)
     lifecycle_stage = LifecycleStage.ACTIVE
     artifact_uri = random_str(random_int(10, 40))
     ri = RunInfo(
         run_uuid=run_id,
         run_id=run_id,
         experiment_id=experiment_id,
         user_id=user_id,
         status=status,
         start_time=start_time,
         end_time=end_time,
         lifecycle_stage=lifecycle_stage,
         artifact_uri=artifact_uri,
     )
     return (
         ri,
         run_id,
         experiment_id,
         user_id,
         status,
         start_time,
         end_time,
         lifecycle_stage,
         artifact_uri,
     )
示例#2
0
def test_yaml_read_and_write(tmpdir):
    temp_dir = str(tmpdir)
    yaml_file = random_file("yaml")
    long_value = long(1) if six.PY2 else 1  # pylint: disable=undefined-variable
    data = {
        "a": random_int(),
        "B": random_int(),
        "text_value": u"中文",
        "long_value": long_value,
        "int_value": 32,
        "text_value_2": u"hi"
    }
    file_utils.write_yaml(temp_dir, yaml_file, data)
    read_data = file_utils.read_yaml(temp_dir, yaml_file)
    assert data == read_data
    yaml_path = os.path.join(temp_dir, yaml_file)
    with codecs.open(yaml_path, encoding="utf-8") as handle:
        contents = handle.read()
    assert "!!python" not in contents
    # Check that UTF-8 strings are written properly to the file (rather than as ASCII
    # representations of their byte sequences).
    assert u"中文" in contents

    def edit_func(old_dict):
        old_dict["more_text"] = u"西班牙语"
        return old_dict

    assert "more_text" not in file_utils.read_yaml(temp_dir, yaml_file)
    with safe_edit_yaml(temp_dir, yaml_file, edit_func):
        editted_dict = file_utils.read_yaml(temp_dir, yaml_file)
        assert "more_text" in editted_dict
        assert editted_dict["more_text"] == u"西班牙语"
    assert "more_text" not in file_utils.read_yaml(temp_dir, yaml_file)
示例#3
0
 def _create():
     metrics = [Metric(random_str(10),
                       random_int(0, 1000), int(time.time()) + random_int(-1e4, 1e4))]
     params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]  # noqa
     tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]  # noqa
     rd = RunData(metrics=metrics, params=params, tags=tags)
     return rd, metrics, params, tags
def test_parse_json_input_split_oriented():
    size = 200
    data = {"col_m": [random_int(0, 1000) for _ in range(size)],
            "col_z": [random_str(4) for _ in range(size)],
            "col_a": [random_int() for _ in range(size)]}
    p1 = pd.DataFrame.from_dict(data)
    p2 = pyfunc_scoring_server.parse_json_input(p1.to_json(orient="split"), orient="split")
    assert all(p1 == p2)
def test_parse_json_input_records_oriented():
    size = 20
    data = {"col_m": [random_int(0, 1000) for _ in range(size)],
            "col_z": [random_str(4) for _ in range(size)],
            "col_a": [random_int() for _ in range(size)]}
    p1 = pd.DataFrame.from_dict(data)
    p2 = pyfunc_scoring_server.parse_json_input(p1.to_json(orient="records"), orient="records")
    # "records" orient may shuffle column ordering. Hence comparing each column Series
    for col in data.keys():
        assert all(p1[col] == p2[col])
示例#6
0
 def _create():
     metrics = [
         Metric(
             key=random_str(10),
             value=random_int(0, 1000),
             timestamp=int(time.time()) + random_int(-1e4, 1e4),
             step=random_int(),
         )
     ]
     params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]
     tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]
     rd = RunData(metrics=metrics, params=params, tags=tags)
     return rd, metrics, params, tags
def test_parse_json_input_split_oriented_to_numpy_array():
    size = 200
    data = OrderedDict([("col_m", [random_int(0, 1000) for _ in range(size)]),
                        ("col_z", [random_str(4) for _ in range(size)]),
                        ("col_a", [random_int() for _ in range(size)])])
    p0 = pd.DataFrame.from_dict(data)
    np_array = np.array([[a, b, c] for a, b, c in
                         zip(data['col_m'], data['col_z'], data['col_a'])],
                        dtype=object)
    p1 = pd.DataFrame(np_array).infer_objects()
    p2 = pyfunc_scoring_server.parse_split_oriented_json_input_to_numpy(
        p0.to_json(orient="split"))
    np.testing.assert_array_equal(p1, p2)
示例#8
0
def test_infer_and_parse_json_input():
    size = 20
    # input is correctly recognized as list, and parsed as pd df with orient 'records'
    data = {
        "col_m": [random_int(0, 1000) for _ in range(size)],
        "col_z": [random_str(4) for _ in range(size)],
        "col_a": [random_int() for _ in range(size)],
    }
    p1 = pd.DataFrame.from_dict(data)
    p2 = pyfunc_scoring_server.infer_and_parse_json_input(
        p1.to_json(orient="records"))
    assert all(p1 == p2)

    # input is correctly recognized as a dict, and parsed as pd df with orient 'split'
    data = {
        "col_m": [random_int(0, 1000) for _ in range(size)],
        "col_z": [random_str(4) for _ in range(size)],
        "col_a": [random_int() for _ in range(size)],
    }
    p1 = pd.DataFrame.from_dict(data)
    p2 = pyfunc_scoring_server.infer_and_parse_json_input(
        p1.to_json(orient="split"))
    assert all(p1 == p2)

    # input is correctly recognized as tf serving input
    arr = [
        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
        [[3, 2, 1], [6, 5, 4], [9, 8, 7]],
    ]
    tfserving_input = {"instances": arr}
    result = pyfunc_scoring_server.infer_and_parse_json_input(
        json.dumps(tfserving_input))
    assert result.shape == (2, 3, 3)
    assert (result == np.array(arr)).all()

    # input is unrecognized JSON input
    with pytest.raises(MlflowException) as ex:
        pyfunc_scoring_server.infer_and_parse_json_input(
            json.dumps('"just a string"'))
    assert (
        "Failed to parse input from JSON. Ensure that input is a valid JSON"
        " list or dictionary." in str(ex))

    # input is not json str
    with pytest.raises(MlflowException) as ex:
        pyfunc_scoring_server.infer_and_parse_json_input("(not a json string)")
    assert (
        "Failed to parse input from JSON. Ensure that input is a valid JSON"
        " formatted string." in str(ex))
示例#9
0
    def test_mkdir(self):
        new_dir_name = "mkdir_test_%d" % random_int()
        file_utils.mkdir(self.test_folder, new_dir_name)
        self.assertEqual(os.listdir(self.test_folder), [new_dir_name])

        with self.assertRaises(OSError):
            file_utils.mkdir("/   bad directory @ name ", "ouch")
示例#10
0
 def test_delete_tags(self):
     fs = FileStore(self.test_root)
     exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
     run_id = self.exp_data[exp_id]['runs'][0]
     fs.set_tag(run_id, RunTag("tag0", "value0"))
     fs.set_tag(run_id, RunTag("tag1", "value1"))
     tags = fs.get_run(run_id).data.tags
     assert tags["tag0"] == "value0"
     assert tags["tag1"] == "value1"
     fs.delete_tag(run_id, "tag0")
     new_tags = fs.get_run(run_id).data.tags
     assert "tag0" not in new_tags.keys()
     # test that you cannot delete tags that don't exist.
     with pytest.raises(MlflowException):
         fs.delete_tag(run_id, "fakeTag")
     # test that you cannot delete tags for nonexistent runs
     with pytest.raises(MlflowException):
         fs.delete_tag("random_id", "tag0")
     fs = FileStore(self.test_root)
     fs.delete_run(run_id)
     # test that you cannot delete tags for deleted runs.
     assert fs.get_run(
         run_id).info.lifecycle_stage == LifecycleStage.DELETED
     with pytest.raises(MlflowException):
         fs.delete_tag(run_id, "tag0")
示例#11
0
    def test_rename_experiment(self):
        fs = FileStore(self.test_root)
        exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]

        # Error cases
        with self.assertRaises(Exception):
            fs.rename_experiment(exp_id, None)
        with self.assertRaises(Exception):
            # test that names of existing experiments are checked before renaming
            other_exp_id = None
            for exp in self.experiments:
                if exp != exp_id:
                    other_exp_id = exp
                    break
            fs.rename_experiment(exp_id, fs.get_experiment(other_exp_id).name)

        exp_name = self.exp_data[exp_id]["name"]
        new_name = exp_name + "!!!"
        self.assertNotEqual(exp_name, new_name)
        self.assertEqual(fs.get_experiment(exp_id).name, exp_name)
        fs.rename_experiment(exp_id, new_name)
        self.assertEqual(fs.get_experiment(exp_id).name, new_name)

        # Ensure that we cannot rename deleted experiments.
        fs.delete_experiment(exp_id)
        with pytest.raises(Exception) as e:
            fs.rename_experiment(exp_id, exp_name)
        assert "non-active lifecycle" in str(e.value)
        self.assertEqual(fs.get_experiment(exp_id).name, new_name)

        # Restore the experiment, and confirm that we acn now rename it.
        fs.restore_experiment(exp_id)
        self.assertEqual(fs.get_experiment(exp_id).name, new_name)
        fs.rename_experiment(exp_id, exp_name)
        self.assertEqual(fs.get_experiment(exp_id).name, exp_name)
示例#12
0
    def test_creation_and_hydration(self):
        path = random_str(random_int(10, 50))
        is_dir = random_int(10, 2500) % 2 == 0
        size_in_bytes = random_int(1, 10000)
        fi1 = FileInfo(path, is_dir, size_in_bytes)
        self._check(fi1, path, is_dir, size_in_bytes)

        as_dict = {"path": path, "is_dir": is_dir, "file_size": size_in_bytes}
        self.assertEqual(dict(fi1), as_dict)

        proto = fi1.to_proto()
        fi2 = FileInfo.from_proto(proto)
        self._check(fi2, path, is_dir, size_in_bytes)

        fi3 = FileInfo.from_dictionary(as_dict)
        self._check(fi3, path, is_dir, size_in_bytes)
示例#13
0
 def test_create_run_with_parent_id(self):
     fs = FileStore(self.test_root)
     exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
     run = fs.create_run(exp_id, 'user', 'name', 'source_type', 'source_name',
                         'entry_point_name', 0, None, [], 'test_parent_run_id')
     assert any([t.key == MLFLOW_PARENT_RUN_ID and t.value == 'test_parent_run_id'
                 for t in fs.get_all_tags(run.info.run_uuid)])
示例#14
0
    def test_delete_restore_experiment(self):
        fs = FileStore(self.test_root)
        exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
        exp_name = self.exp_data[exp_id]["name"]

        # delete it
        fs.delete_experiment(exp_id)
        self.assertTrue(exp_id not in self._extract_ids(fs.list_experiments(ViewType.ACTIVE_ONLY)))
        self.assertTrue(exp_id in self._extract_ids(fs.list_experiments(ViewType.DELETED_ONLY)))
        self.assertTrue(exp_id in self._extract_ids(fs.list_experiments(ViewType.ALL)))
        self.assertEqual(fs.get_experiment(exp_id).lifecycle_stage,
                         Experiment.DELETED_LIFECYCLE)

        # restore it
        fs.restore_experiment(exp_id)
        restored_1 = fs.get_experiment(exp_id)
        self.assertEqual(restored_1.experiment_id, exp_id)
        self.assertEqual(restored_1.name, exp_name)
        restored_2 = fs.get_experiment_by_name(exp_name)
        self.assertEqual(restored_2.experiment_id, exp_id)
        self.assertEqual(restored_2.name, exp_name)
        self.assertTrue(exp_id in self._extract_ids(fs.list_experiments(ViewType.ACTIVE_ONLY)))
        self.assertTrue(exp_id not in self._extract_ids(fs.list_experiments(ViewType.DELETED_ONLY)))
        self.assertTrue(exp_id in self._extract_ids(fs.list_experiments(ViewType.ALL)))
        self.assertEqual(fs.get_experiment(exp_id).lifecycle_stage,
                         Experiment.ACTIVE_LIFECYCLE)
示例#15
0
 def test_create_run_in_deleted_experiment(self):
     fs = FileStore(self.test_root)
     exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
     # delete it
     fs.delete_experiment(exp_id)
     with pytest.raises(Exception):
         fs.create_run(exp_id, 'user', 0, [])
示例#16
0
 def _create():
     metrics = [
         Metric(random_str(10), random_int(),
                int(time.time() + random_int(-1e4, 1e4)))
         for x in range(100)
     ]  # noqa
     params = [
         Param(random_str(10), random_str(random_int(10, 35)))
         for x in range(10)
     ]  # noqa
     rd = RunData()
     for p in params:
         rd.add_param(p)
     for m in metrics:
         rd.add_metric(m)
     return rd, metrics, params
示例#17
0
    def test_creation_and_hydration(self):
        exp_id = random_int()
        name = "exp_%d_%d" % (random_int(), random_int())
        location = random_file(".json")

        exp = Experiment(exp_id, name, location)
        self._check(exp, exp_id, name, location)

        as_dict = {"experiment_id": exp_id, "name": name, "artifact_location": location}
        self.assertEqual(dict(exp), as_dict)

        proto = exp.to_proto()
        exp2 = Experiment.from_proto(proto)
        self._check(exp2, exp_id, name, location)

        exp3 = Experiment.from_dictionary(as_dict)
        self._check(exp3, exp_id, name, location)
示例#18
0
def test_mkdir(tmpdir):
    temp_dir = str(tmpdir)
    new_dir_name = "mkdir_test_%d" % random_int()
    file_utils.mkdir(temp_dir, new_dir_name)
    assert os.listdir(temp_dir) == [new_dir_name]

    with pytest.raises(OSError):
        file_utils.mkdir("/   bad directory @ name ", "ouch")
示例#19
0
 def test_create_run_in_deleted_experiment(self):
     fs = FileStore(self.test_root)
     exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
     # delete it
     fs.delete_experiment(exp_id)
     with pytest.raises(Exception):
         fs.create_run(exp_id, 'user', 'name', 'source_type', 'source_name', 'entry_point_name',
                       0, None, [], None)
示例#20
0
 def test_get_deleted_run(self):
     """
     Getting metrics/tags/params/run info should be allowed on deleted runs.
     """
     fs = FileStore(self.test_root)
     exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
     run_id = self.exp_data[exp_id]['runs'][0]
     fs.delete_run(run_id)
     assert fs.get_run(run_id)
示例#21
0
 def test_create_run_with_parent_id(self):
     fs = FileStore(self.test_root)
     exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
     run = fs.create_run(exp_id, 'user', 'name', 'source_type',
                         'source_name', 'entry_point_name', 0, None, [],
                         'test_parent_run_id')
     assert fs.get_run(
         run.info.run_uuid
     ).data.tags[MLFLOW_PARENT_RUN_ID] == 'test_parent_run_id'
示例#22
0
    def test_creation_and_hydration(self):
        key = random_str(random_int(
            10, 25))  # random string on size in range [10, 25]
        value = random_str(random_int(
            55, 75))  # random string on size in range [55, 75]

        param = Param(key, value)
        self._check(param, key, value)

        as_dict = {"key": key, "value": value}
        self.assertEqual(dict(param), as_dict)

        proto = param.to_proto()
        param2 = Param.from_proto(proto)
        self._check(param2, key, value)

        param3 = Param.from_dictionary(as_dict)
        self._check(param3, key, value)
示例#23
0
 def test_delete_restore_run(self):
     fs = FileStore(self.test_root)
     exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
     run_id = self.exp_data[exp_id]['runs'][0]
     # Should not throw.
     assert fs.get_run(run_id).info.lifecycle_stage == 'active'
     fs.delete_run(run_id)
     assert fs.get_run(run_id).info.lifecycle_stage == 'deleted'
     fs.restore_run(run_id)
     assert fs.get_run(run_id).info.lifecycle_stage == 'active'
示例#24
0
    def test_get_experiment(self):
        fs = FileStore(self.test_root)
        for exp_id in self.experiments:
            self._verify_experiment(fs, exp_id)

        # test that fake experiments dont exist.
        # look for random experiment ids between 8000, 15000 since created ones are (100, 2000)
        for exp_id in set(random_int(8000, 15000) for x in range(20)):
            with self.assertRaises(Exception):
                fs.get_experiment(exp_id)
示例#25
0
 def test_yaml_read_and_write(self):
     yaml_file = random_file("yaml")
     long_value = long(1) if six.PY2 else 1  # pylint: disable=undefined-variable
     data = {
         "a": random_int(),
         "B": random_int(),
         "text_value": u"中文",
         "long_value": long_value,
         "int_value": 32,
         "text_value_2": u"hi"
     }
     file_utils.write_yaml(self.test_folder, yaml_file, data)
     read_data = file_utils.read_yaml(self.test_folder, yaml_file)
     self.assertEqual(data, read_data)
     yaml_path = file_utils.build_path(self.test_folder, yaml_file)
     with codecs.open(yaml_path, encoding="utf-8") as handle:
         contents = handle.read()
     self.assertNotIn("!!python", contents)
     # Check that UTF-8 strings are written properly to the file (rather than as ASCII
     # representations of their byte sequences).
     self.assertIn(u"中文", contents)
示例#26
0
 def test_hard_delete_run(self):
     fs = FileStore(self.test_root)
     exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
     run_id = self.exp_data[exp_id]["runs"][0]
     fs._hard_delete_run(run_id)
     with self.assertRaises(MlflowException):
         fs.get_run(run_id)
     with self.assertRaises(MlflowException):
         fs.get_all_tags(run_id)
     with self.assertRaises(MlflowException):
         fs.get_all_metrics(run_id)
     with self.assertRaises(MlflowException):
         fs.get_all_params(run_id)
示例#27
0
    def test_get_experiment(self):
        fs = FileStore(self.test_root)
        for exp_id in self.experiments:
            exp = fs.get_experiment(exp_id)
            self.assertEqual(exp.experiment_id, exp_id)
            self.assertEqual(exp.name, self.exp_data[exp_id]["name"])
            self.assertEqual(exp.artifact_location, self.exp_data[exp_id]["artifact_location"])

        # test that fake experiments dont exist.
        # look for random experiment ids between 8000, 15000 since created ones are (100, 2000)
        for exp_id in set(random_int(8000, 15000) for x in range(20)):
            with self.assertRaises(Exception):
                fs.get_experiment(exp_id)
示例#28
0
 def _create():
     metrics = [
         Metric(random_str(10), random_int(0, 1000),
                int(time.time() + random_int(-1e4, 1e4)))
         for _ in range(100)
     ]
     params = [
         Param(random_str(10), random_str(random_int(10, 35)))
         for _ in range(10)
     ]  # noqa
     tags = [
         RunTag(random_str(10), random_str(random_int(10, 35)))
         for _ in range(10)
     ]  # noqa
     rd = RunData()
     for p in params:
         rd._add_param(p)
     for m in metrics:
         rd._add_metric(m)
     for t in tags:
         rd._add_tag(t)
     return rd, metrics, params, tags
示例#29
0
    def test_creation_and_hydration(self):
        exp_id = str(random_int())
        name = "exp_%d_%d" % (random_int(), random_int())
        lifecycle_stage = LifecycleStage.ACTIVE
        location = random_file(".json")

        exp = Experiment(exp_id, name, location, lifecycle_stage)
        self._check(exp, exp_id, name, location, lifecycle_stage)

        as_dict = {
            "experiment_id": exp_id,
            "name": name,
            "artifact_location": location,
            "lifecycle_stage": lifecycle_stage
        }
        self.assertEqual(dict(exp), as_dict)

        proto = exp.to_proto()
        exp2 = Experiment.from_proto(proto)
        self._check(exp2, exp_id, name, location, lifecycle_stage)

        exp3 = Experiment.from_dictionary(as_dict)
        self._check(exp3, exp_id, name, location, lifecycle_stage)
示例#30
0
    def test_set_deleted_run(self):
        """
        Setting metrics/tags/params/updating run info should not be allowed on deleted runs.
        """
        fs = FileStore(self.test_root)
        exp_id = self.experiments[random_int(0, len(self.experiments) - 1)]
        run_id = self.exp_data[exp_id]['runs'][0]
        fs.delete_run(run_id)

        assert fs.get_run(run_id).info.lifecycle_stage == LifecycleStage.DELETED
        with pytest.raises(MlflowException):
            fs.set_tag(run_id, RunTag('a', 'b'))
        with pytest.raises(MlflowException):
            fs.log_metric(run_id, Metric('a', 0.0, timestamp=0))
        with pytest.raises(MlflowException):
            fs.log_param(run_id, Param('a', 'b'))