def _create_experiment_with_id(self, name, experiment_id, artifact_uri): self._check_root_dir() meta_dir = mkdir(self.root_directory, experiment_id) artifact_uri = artifact_uri or build_path(self.artifact_root_uri, experiment_id) experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE) write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment)) return experiment_id
def test_yaml_read_and_write(tmpdir): temp_dir = str(tmpdir) yaml_file = random_file("yaml") long_value = 1 # pylint: disable=undefined-variable data = { "a": random_int(), "B": random_int(), "text_value": u"中文", "long_value": long_value, "int_value": 32, "text_value_2": u"hi", } file_utils.write_yaml(temp_dir, yaml_file, data) read_data = file_utils.read_yaml(temp_dir, yaml_file) assert data == read_data yaml_path = os.path.join(temp_dir, yaml_file) with codecs.open(yaml_path, encoding="utf-8") as handle: contents = handle.read() assert "!!python" not in contents # Check that UTF-8 strings are written properly to the file (rather than as ASCII # representations of their byte sequences). assert u"中文" in contents def edit_func(old_dict): old_dict["more_text"] = u"西班牙语" return old_dict assert "more_text" not in file_utils.read_yaml(temp_dir, yaml_file) with safe_edit_yaml(temp_dir, yaml_file, edit_func): editted_dict = file_utils.read_yaml(temp_dir, yaml_file) assert "more_text" in editted_dict assert editted_dict["more_text"] == u"西班牙语" assert "more_text" not in file_utils.read_yaml(temp_dir, yaml_file)
def test_bad_experiment_id_recorded_for_run(self): fs = FileStore(self.test_root) exp_0 = fs.get_experiment(FileStore.DEFAULT_EXPERIMENT_ID) all_runs = self._search(fs, exp_0.experiment_id) all_run_ids = self.exp_data[exp_0.experiment_id]["runs"] assert len(all_runs) == len(all_run_ids) # change experiment pointer in run bad_run_id = str(self.exp_data[exp_0.experiment_id]['runs'][0]) path = os.path.join(self.test_root, str(exp_0.experiment_id), bad_run_id) experiment_data = read_yaml(path, "meta.yaml") experiment_data["experiment_id"] = 1 write_yaml(path, "meta.yaml", experiment_data, True) with pytest.raises(MlflowException) as e: fs.get_run(bad_run_id) assert e.message.contains("not found") valid_runs = self._search(fs, exp_0.experiment_id) assert len(valid_runs) == len(all_runs) - 1 for rid in all_run_ids: if rid != bad_run_id: fs.get_run(rid)
def _overwrite_run_info(self, run_info): run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid) run_info_dict = _make_persisted_run_info_dict(run_info) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict, overwrite=True)
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags): """ Creates a run with the specified attributes. """ if self.get_experiment(experiment_id) is None: raise Exception( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) num_runs = len(self._list_run_uuids(experiment_id)) run_info = RunInfo(run_uuid=run_uuid, experiment_id=experiment_id, name="Run %s" % num_runs, artifact_uri=artifact_uri, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=RunStatus.RUNNING, start_time=start_time, end_time=None, source_version=source_version, tags=tags) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid) mkdir(run_dir) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, dict(run_info)) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) return Run(run_info=run_info, run_data=None)
def _create_experiment_with_id(self, name, experiment_id, artifact_uri): self._check_root_dir() meta_dir = mkdir(self.root_directory, str(experiment_id)) artifact_uri = artifact_uri or path_to_local_file_uri( os.path.join(self.root_directory, str(experiment_id))) experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE) write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment)) return experiment_id
def update_run_info(self, run_uuid, run_status, end_time): run_info = self.get_run(run_uuid).info new_info = run_info.copy_with_overrides(run_status, end_time) run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, dict(new_info), overwrite=True) return new_info
def rename_experiment(self, experiment_id, new_name): meta_dir = os.path.join(self.root_directory, experiment_id) # if experiment is malformed, will raise error experiment = self._get_experiment(experiment_id) if experiment is None: raise MlflowException("Experiment '%s' does not exist." % experiment_id, databricks_pb2.RESOURCE_DOES_NOT_EXIST) experiment._set_name(new_name) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise Exception("Cannot rename experiment in non-active lifecycle stage." " Current stage: %s" % experiment.lifecycle_stage) write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment), overwrite=True)
def rename_experiment(self, experiment_id, new_name): meta_dir = os.path.join(self.root_directory, str(experiment_id)) experiment = self._get_experiment(experiment_id) experiment._set_name(new_name) if experiment.lifecycle_stage != Experiment.ACTIVE_LIFECYCLE: raise Exception( "Cannot rename experiment in non-active lifecycle stage." " Current stage: %s" % experiment.lifecycle_stage) write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment), overwrite=True)
def _create_experiment_with_id(self, name, experiment_id, artifact_uri): artifact_uri = artifact_uri or posixpath.join(self.artifact_root_uri, str(experiment_id)) self._check_root_dir() meta_dir = mkdir(self.root_directory, str(experiment_id)) experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE) experiment_dict = dict(experiment) # tags are added to the file system and are not written to this dict on write # As such, we should not include them in the meta file. del experiment_dict['tags'] write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict) return experiment_id
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags, parent_run_id): """ Creates a run with the specified attributes. """ experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id experiment = self.get_experiment(experiment_id) if experiment is None: raise MlflowException( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id, databricks_pb2.RESOURCE_DOES_NOT_EXIST) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException( "Could not create run under non-active experiment with ID " "%s." % experiment_id, databricks_pb2.INVALID_STATE) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) run_info = RunInfo(run_uuid=run_uuid, experiment_id=experiment_id, name="", artifact_uri=artifact_uri, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=RunStatus.RUNNING, start_time=start_time, end_time=None, source_version=source_version, lifecycle_stage=LifecycleStage.ACTIVE) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid) mkdir(run_dir) run_info_dict = _make_persisted_run_info_dict(run_info) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) for tag in tags: self.set_tag(run_uuid, tag) if parent_run_id: self.set_tag(run_uuid, RunTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id)) if run_name: self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name)) return Run(run_info=run_info, run_data=None)
def _create_experiment_with_id(self, name, experiment_id, artifact_uri, tags): artifact_uri = artifact_uri or append_to_uri_path( self.artifact_root_uri, str(experiment_id) ) self._check_root_dir() meta_dir = mkdir(self.root_directory, str(experiment_id)) experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE) experiment_dict = dict(experiment) # tags are added to the file system and are not written to this dict on write # As such, we should not include them in the meta file. del experiment_dict["tags"] write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict) if tags is not None: for tag in tags: self.set_experiment_tag(experiment_id, tag) return experiment_id
def test_yaml_read_and_write(tmpdir): temp_dir = str(tmpdir) yaml_file = random_file("yaml") long_value = long(1) if six.PY2 else 1 # pylint: disable=undefined-variable data = {"a": random_int(), "B": random_int(), "text_value": u"中文", "long_value": long_value, "int_value": 32, "text_value_2": u"hi"} file_utils.write_yaml(temp_dir, yaml_file, data) read_data = file_utils.read_yaml(temp_dir, yaml_file) assert data == read_data yaml_path = file_utils.build_path(temp_dir, yaml_file) with codecs.open(yaml_path, encoding="utf-8") as handle: contents = handle.read() assert "!!python" not in contents # Check that UTF-8 strings are written properly to the file (rather than as ASCII # representations of their byte sequences). assert u"中文" in contents
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags): """ Creates a run with the specified attributes. """ experiment = self.get_experiment(experiment_id) if experiment is None: raise Exception( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id) if experiment.lifecycle_stage != Experiment.ACTIVE_LIFECYCLE: raise Exception( 'Could not create run under non-active experiment with ID ' '%s.' % experiment_id) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) run_info = RunInfo(run_uuid=run_uuid, experiment_id=experiment_id, name="", artifact_uri=artifact_uri, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=RunStatus.RUNNING, start_time=start_time, end_time=None, source_version=source_version, lifecycle_stage=RunInfo.ACTIVE_LIFECYCLE) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid) mkdir(run_dir) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, _make_persisted_run_info_dict(run_info)) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) for tag in tags: self.set_tag(run_uuid, tag) if run_name: self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name)) return Run(run_info=run_info, run_data=None)
def create_run(self, experiment_id, user_id, start_time, tags): """ Creates a run with the specified attributes. """ experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id experiment = self.get_experiment(experiment_id) if experiment is None: raise MlflowException( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id, databricks_pb2.RESOURCE_DOES_NOT_EXIST, ) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException( "Could not create run under non-active experiment with ID %s." % experiment_id, databricks_pb2.INVALID_STATE, ) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) run_info = RunInfo( run_uuid=run_uuid, run_id=run_uuid, experiment_id=experiment_id, artifact_uri=artifact_uri, user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, lifecycle_stage=LifecycleStage.ACTIVE, ) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id) mkdir(run_dir) run_info_dict = _make_persisted_run_info_dict(run_info) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) for tag in tags: self.set_tag(run_uuid, tag) return self.get_run(run_id=run_uuid)
def test_yaml_read_and_write(self): yaml_file = random_file("yaml") long_value = long(1) if six.PY2 else 1 # pylint: disable=undefined-variable data = { "a": random_int(), "B": random_int(), "text_value": u"中文", "long_value": long_value, "int_value": 32, "text_value_2": u"hi" } file_utils.write_yaml(self.test_folder, yaml_file, data) read_data = file_utils.read_yaml(self.test_folder, yaml_file) self.assertEqual(data, read_data) yaml_path = file_utils.build_path(self.test_folder, yaml_file) with codecs.open(yaml_path, encoding="utf-8") as handle: contents = handle.read() self.assertNotIn("!!python", contents) # Check that UTF-8 strings are written properly to the file (rather than as ASCII # representations of their byte sequences). self.assertIn(u"中文", contents)
def _create_root(self, root): self.test_root = os.path.join(root, "test_file_store_%d" % random_int()) os.mkdir(self.test_root) self.experiments = [random_int(100, int(1e9)) for _ in range(3)] self.exp_data = {} self.run_data = {} # Include default experiment self.experiments.append(Experiment.DEFAULT_EXPERIMENT_ID) for exp in self.experiments: # create experiment exp_folder = os.path.join(self.test_root, str(exp)) os.makedirs(exp_folder) d = {"experiment_id": exp, "name": random_str(), "artifact_location": exp_folder} self.exp_data[exp] = d write_yaml(exp_folder, FileStore.META_DATA_FILE_NAME, d) # add runs self.exp_data[exp]["runs"] = [] for _ in range(2): run_uuid = uuid.uuid4().hex self.exp_data[exp]["runs"].append(run_uuid) run_folder = os.path.join(exp_folder, run_uuid) os.makedirs(run_folder) run_info = {"run_uuid": run_uuid, "experiment_id": exp, "name": random_str(random_int(10, 40)), "source_type": random_int(1, 4), "source_name": random_str(random_int(100, 300)), "entry_point_name": random_str(random_int(100, 300)), "user_id": random_str(random_int(10, 25)), "status": random_int(1, 5), "start_time": random_int(1, 10), "end_time": random_int(20, 30), "source_version": random_str(random_int(10, 30)), "tags": [], "artifact_uri": "%s/%s" % (run_folder, FileStore.ARTIFACTS_FOLDER_NAME), } write_yaml(run_folder, FileStore.META_DATA_FILE_NAME, run_info) self.run_data[run_uuid] = run_info # params params_folder = os.path.join(run_folder, FileStore.PARAMS_FOLDER_NAME) os.makedirs(params_folder) params = {} for _ in range(5): param_name = random_str(random_int(4, 12)) param_value = random_str(random_int(10, 15)) param_file = os.path.join(params_folder, param_name) with open(param_file, 'w') as f: f.write(param_value) params[param_name] = param_value self.run_data[run_uuid]["params"] = params # metrics metrics_folder = os.path.join(run_folder, FileStore.METRICS_FOLDER_NAME) os.makedirs(metrics_folder) metrics = {} for _ in range(3): metric_name = random_str(random_int(6, 10)) timestamp = int(time.time()) metric_file = os.path.join(metrics_folder, metric_name) values = [] for _ in range(10): metric_value = random_int(100, 2000) timestamp += random_int(10000, 2000000) values.append((timestamp, metric_value)) with open(metric_file, 'a') as f: f.write("%d %d\n" % (timestamp, metric_value)) metrics[metric_name] = values self.run_data[run_uuid]["metrics"] = metrics # artifacts os.makedirs(os.path.join(run_folder, FileStore.ARTIFACTS_FOLDER_NAME))
def _create_experiment_with_id(self, name, experiment_id): self._check_root_dir() location = mkdir(self.root_directory, str(experiment_id)) experiment = Experiment(experiment_id, name, location) write_yaml(location, FileStore.META_DATA_FILE_NAME, dict(experiment)) return experiment_id
def test_yaml_read_and_write(self): yaml_file = random_file("yaml") data = {"a": random_int(), "B": random_int()} file_utils.write_yaml(self.test_folder, yaml_file, data) read_data = file_utils.read_yaml(self.test_folder, yaml_file) self.assertEqual(data, read_data)
def __exit__(self, *args): write_yaml(self._root, self._file_name, self._original, overwrite=True)
def __enter__(self): new_dict = self._edit_func(self._original.copy()) write_yaml(self._root, self._file_name, new_dict, overwrite=True)