示例#1
0
def test_upload_spec_local_fs_use_cache(mock_pack_spec_in_pex):
    with tempfile.TemporaryDirectory() as tempdir:
        spec_file = f"{tempdir}/myproject/requirements.txt"
        _write_spec_file(spec_file, ["cloudpickle==1.4.1"])

        pex_file = os.path.join(tempdir, "package.pex")
        mock_pack_spec_in_pex.return_value = pex_file
        with open(pex_file, "w"):
            pass

        result_path = cluster_pack.upload_spec(spec_file, pex_file)
        result_path1 = cluster_pack.upload_spec(spec_file, pex_file)

        mock_pack_spec_in_pex.assert_called_once()
        assert os.path.exists(result_path)
        assert result_path == result_path1 == pex_file
示例#2
0
def test_upload_spec_local_fs():
    spec_file = os.path.join(os.path.dirname(__file__), "resources",
                             "requirements.txt")
    with tempfile.TemporaryDirectory() as tempdir:
        result_path = cluster_pack.upload_spec(spec_file,
                                               f"{tempdir}/package.pex")
        assert os.path.exists(result_path)
        _check_metadata(f"{tempdir}/package.json",
                        ["5a5f33b106aad8584345f5a0044a4188ce78b3f4"])
示例#3
0
def test_upload_spec_unique_name():
    with tempfile.TemporaryDirectory() as tempdir:
        spec_file = f"{tempdir}/myproject/requirements.txt"
        _write_spec_file(spec_file, ["cloudpickle==1.4.1"])

        result_path = cluster_pack.upload_spec(spec_file, f"{tempdir}")

        assert os.path.exists(result_path)
        assert result_path == f"{tempdir}/cluster_pack_myproject.pex"
        _check_metadata(f"{tempdir}/cluster_pack_myproject.json",
                        ["b8721a3c125d3f7edfa27d7b13236e696f652a16"])
示例#4
0
def test_upload_spec_local_fs_changed_reqs(mock_pack_spec_in_pex):
    mock_pack_spec_in_pex.return_value = "/tmp/tmp.pex"
    with tempfile.TemporaryDirectory() as tempdir:
        spec_file = f"{tempdir}/myproject/requirements.txt"
        _write_spec_file(spec_file, ["cloudpickle==1.4.1"])

        pex_file = os.path.join(tempdir, "package.pex")
        mock_pack_spec_in_pex.return_value = pex_file
        with open(pex_file, "w") as f:
            pass

        result_path = cluster_pack.upload_spec(spec_file, pex_file)

        with open(spec_file, "a") as f:
            f.write("skein\n")

        result_path1 = cluster_pack.upload_spec(spec_file, pex_file)
        mock_pack_spec_in_pex.call_count == 2
        assert os.path.exists(result_path)
        assert os.path.exists(result_path1)
        _check_metadata(f"{tempdir}/package.json",
                        ["0fd17ced922a2387fa660fb0cb78e1c77fbe3349"])
示例#5
0
def test_upload_spec_hdfs(mock_get_user, mock_get_default_fs,
                          mock_pack_spec_in_pex, mock_resolve_fs,
                          mock_dump_archive_metadata,
                          mock_is_archive_up_to_date):
    mock_is_archive_up_to_date.return_value = False
    mock_fs = mock.MagicMock()
    mock_resolve_fs.return_value = mock_fs, ""
    mock_fs.exists.return_value = True
    mock_get_default_fs.return_value = "hdfs://"
    mock_get_user.return_value = "testuser"

    spec_file = os.path.join(os.path.dirname(__file__), "resources",
                             "requirements.txt")
    result_path = cluster_pack.upload_spec(
        spec_file, "hdfs:///user/testuser/envs/myenv.pex")
    mock_pack_spec_in_pex.assert_called_once()
    assert result_path == "hdfs:///user/testuser/envs/myenv.pex"
示例#6
0
    def run(self, project_uri: str, entry_point: str, params: Dict,
            version: str, backend_config: Dict, tracking_uri: str,
            experiment_id: str) -> SubmittedRun:
        _logger.info('using yarn backend')
        _logger.info(locals())
        work_dir = fetch_and_validate_project(project_uri, version,
                                              entry_point, params)
        active_run = get_or_create_run(None, project_uri, experiment_id,
                                       work_dir, version, entry_point, params)
        _logger.info(f"run_id={active_run.info.run_id}")
        _logger.info(f"work_dir={work_dir}")
        project = load_project(work_dir)

        storage_dir = backend_config[PROJECT_STORAGE_DIR]

        entry_point_command = project.get_entry_point(entry_point)\
            .compute_command(params, storage_dir)

        _logger.info(f"entry_point_command={entry_point_command}")

        if project.conda_env_path:
            spec_file = project.conda_env_path
        else:
            spec_file = os.path.join(work_dir, "requirements.txt")
            if not os.path.exists(spec_file):
                raise ValueError

        package_path = cluster_pack.upload_spec(spec_file)
        _logger.info(package_path)

        additional_files = []
        for file in os.listdir(work_dir):
            full_path = os.path.join(work_dir, file)
            if os.path.isfile(full_path):
                additional_files.append(full_path)

        entry_point, args = try_split_cmd(entry_point_command)

        _logger.info(f"args {entry_point} {args}")

        if "MLFLOW_YARN_TESTS" in os.environ:
            # we need to have a real tracking server setup to be able to push the run id here
            env = {"MLFLOW_TRACKING_URI": "file:/tmp/mlflow"}
        else:
            env = {
                "MLFLOW_RUN_ID": active_run.info.run_id,
                "MLFLOW_TRACKING_URI": mlflow.get_tracking_uri(),
                "MLFLOW_EXPERIMENT_ID": experiment_id
            }

        _backend_dict = _get_backend_dict(work_dir)
        # update config with what has been passed with --backend-config <json-new-config>
        for key in _backend_dict.keys():
            if key in backend_config:
                _backend_dict[key] = backend_config[key]

        _logger.info(f"backend config: {_backend_dict}")

        app_id = skein_launcher.submit(self._skein_client,
                                       module_name=entry_point,
                                       args=args,
                                       package_path=package_path,
                                       additional_files=additional_files,
                                       env_vars=env,
                                       process_logs=_upload_logs,
                                       **_backend_dict)

        MlflowClient().set_tag(active_run.info.run_id, "skein_application_id",
                               app_id)
        return YarnSubmittedRun(self._skein_client, app_id,
                                active_run.info.run_id)