def test_upload_spec_local_fs_use_cache(mock_pack_spec_in_pex): with tempfile.TemporaryDirectory() as tempdir: spec_file = f"{tempdir}/myproject/requirements.txt" _write_spec_file(spec_file, ["cloudpickle==1.4.1"]) pex_file = os.path.join(tempdir, "package.pex") mock_pack_spec_in_pex.return_value = pex_file with open(pex_file, "w"): pass result_path = cluster_pack.upload_spec(spec_file, pex_file) result_path1 = cluster_pack.upload_spec(spec_file, pex_file) mock_pack_spec_in_pex.assert_called_once() assert os.path.exists(result_path) assert result_path == result_path1 == pex_file
def test_upload_spec_local_fs(): spec_file = os.path.join(os.path.dirname(__file__), "resources", "requirements.txt") with tempfile.TemporaryDirectory() as tempdir: result_path = cluster_pack.upload_spec(spec_file, f"{tempdir}/package.pex") assert os.path.exists(result_path) _check_metadata(f"{tempdir}/package.json", ["5a5f33b106aad8584345f5a0044a4188ce78b3f4"])
def test_upload_spec_unique_name(): with tempfile.TemporaryDirectory() as tempdir: spec_file = f"{tempdir}/myproject/requirements.txt" _write_spec_file(spec_file, ["cloudpickle==1.4.1"]) result_path = cluster_pack.upload_spec(spec_file, f"{tempdir}") assert os.path.exists(result_path) assert result_path == f"{tempdir}/cluster_pack_myproject.pex" _check_metadata(f"{tempdir}/cluster_pack_myproject.json", ["b8721a3c125d3f7edfa27d7b13236e696f652a16"])
def test_upload_spec_local_fs_changed_reqs(mock_pack_spec_in_pex): mock_pack_spec_in_pex.return_value = "/tmp/tmp.pex" with tempfile.TemporaryDirectory() as tempdir: spec_file = f"{tempdir}/myproject/requirements.txt" _write_spec_file(spec_file, ["cloudpickle==1.4.1"]) pex_file = os.path.join(tempdir, "package.pex") mock_pack_spec_in_pex.return_value = pex_file with open(pex_file, "w") as f: pass result_path = cluster_pack.upload_spec(spec_file, pex_file) with open(spec_file, "a") as f: f.write("skein\n") result_path1 = cluster_pack.upload_spec(spec_file, pex_file) mock_pack_spec_in_pex.call_count == 2 assert os.path.exists(result_path) assert os.path.exists(result_path1) _check_metadata(f"{tempdir}/package.json", ["0fd17ced922a2387fa660fb0cb78e1c77fbe3349"])
def test_upload_spec_hdfs(mock_get_user, mock_get_default_fs, mock_pack_spec_in_pex, mock_resolve_fs, mock_dump_archive_metadata, mock_is_archive_up_to_date): mock_is_archive_up_to_date.return_value = False mock_fs = mock.MagicMock() mock_resolve_fs.return_value = mock_fs, "" mock_fs.exists.return_value = True mock_get_default_fs.return_value = "hdfs://" mock_get_user.return_value = "testuser" spec_file = os.path.join(os.path.dirname(__file__), "resources", "requirements.txt") result_path = cluster_pack.upload_spec( spec_file, "hdfs:///user/testuser/envs/myenv.pex") mock_pack_spec_in_pex.assert_called_once() assert result_path == "hdfs:///user/testuser/envs/myenv.pex"
def run(self, project_uri: str, entry_point: str, params: Dict, version: str, backend_config: Dict, tracking_uri: str, experiment_id: str) -> SubmittedRun: _logger.info('using yarn backend') _logger.info(locals()) work_dir = fetch_and_validate_project(project_uri, version, entry_point, params) active_run = get_or_create_run(None, project_uri, experiment_id, work_dir, version, entry_point, params) _logger.info(f"run_id={active_run.info.run_id}") _logger.info(f"work_dir={work_dir}") project = load_project(work_dir) storage_dir = backend_config[PROJECT_STORAGE_DIR] entry_point_command = project.get_entry_point(entry_point)\ .compute_command(params, storage_dir) _logger.info(f"entry_point_command={entry_point_command}") if project.conda_env_path: spec_file = project.conda_env_path else: spec_file = os.path.join(work_dir, "requirements.txt") if not os.path.exists(spec_file): raise ValueError package_path = cluster_pack.upload_spec(spec_file) _logger.info(package_path) additional_files = [] for file in os.listdir(work_dir): full_path = os.path.join(work_dir, file) if os.path.isfile(full_path): additional_files.append(full_path) entry_point, args = try_split_cmd(entry_point_command) _logger.info(f"args {entry_point} {args}") if "MLFLOW_YARN_TESTS" in os.environ: # we need to have a real tracking server setup to be able to push the run id here env = {"MLFLOW_TRACKING_URI": "file:/tmp/mlflow"} else: env = { "MLFLOW_RUN_ID": active_run.info.run_id, "MLFLOW_TRACKING_URI": mlflow.get_tracking_uri(), "MLFLOW_EXPERIMENT_ID": experiment_id } _backend_dict = _get_backend_dict(work_dir) # update config with what has been passed with --backend-config <json-new-config> for key in _backend_dict.keys(): if key in backend_config: _backend_dict[key] = backend_config[key] _logger.info(f"backend config: {_backend_dict}") app_id = skein_launcher.submit(self._skein_client, module_name=entry_point, args=args, package_path=package_path, additional_files=additional_files, env_vars=env, process_logs=_upload_logs, **_backend_dict) MlflowClient().set_tag(active_run.info.run_id, "skein_application_id", app_id) return YarnSubmittedRun(self._skein_client, app_id, active_run.info.run_id)