def launch_remote_check(file: str) -> Tuple[bool, str]: logging.info('Launching remote check') zip_hdfs, _ = cluster_pack.upload_env(packer=cluster_pack.PEX_PACKER) archive_name = os.path.basename(zip_hdfs) with skein.Client() as client: files = { archive_name: zip_hdfs, 'check_hadoop_env.py': __file__, } editable_packages = cluster_pack.get_editable_requirements() if 'tf_yarn' in editable_packages: tf_yarn_zip = cluster_pack.zip_path(editable_packages['tf_yarn'], False) logger.info(f"zip path for editable tf_yarn is {tf_yarn_zip}") files.update({'tf_yarn': tf_yarn_zip}) service = skein.Service( script=f'./{archive_name} check_hadoop_env.py --file {file}', resources=skein.Resources(2 * 1024, 1), env={ 'PEX_ROOT': '/tmp/{uuid.uuid4()}/', 'PYTHONPATH': '.:', }, files=files, instances=1) spec = skein.ApplicationSpec( {'HADOOP_ENV_CHECKER': service}, acls=skein.model.ACLs(enable=True, view_users=['*']), ) app = client.submit_and_connect(spec) logging.info('Remote check started') result = app.kv.wait('result').decode() app_id = app.id app.shutdown() return result == "True", app_id
def _setup_task_env( tempdir: str, files: Dict[str, str] = None, env: Dict[str, str] = {}, n_try: int = 0 ): task_files = _maybe_zip_task_files(files or {}, tempdir) task_files[__package__] = cluster_pack.zip_path(here, False, tempdir) _add_to_env(env, "LIBHDFS_OPTS", "-Xms64m -Xmx512m") env["TF_YARN_N_TRY"] = str(n_try) task_env = { **env, # Make Python modules/packages passed via ``files`` importable. "PYTHONPATH": ".:" + env.get("PYTHONPATH", ""), "PEX_ROOT": os.path.join("/tmp", str(uuid.uuid4())) } if mlflow.use_mlflow: task_env["MLFLOW_RUN_ID"] = mlflow.active_run_id() task_env["MLFLOW_TRACKING_URI"] = mlflow.get_tracking_uri() task_env["GIT_PYTHON_REFRESH"] = "quiet" return task_files, task_env
def _maybe_zip_task_files(files, tempdir): task_files = {} for target, source in files.items(): assert target not in task_files if os.path.isdir(source): source = cluster_pack.zip_path(source, False, tempdir) task_files[target] = source return task_files
def zip_path(py_dir: str, include_base_name=True, tmp_dir: str = _get_tmp_dir()): return cluster_pack.zip_path(py_dir, include_base_name, tmp_dir)