示例#1
0
def launch_remote_check(file: str) -> Tuple[bool, str]:
    logging.info('Launching remote check')
    zip_hdfs, _ = cluster_pack.upload_env(packer=cluster_pack.PEX_PACKER)
    archive_name = os.path.basename(zip_hdfs)
    with skein.Client() as client:
        files = {
            archive_name: zip_hdfs,
            'check_hadoop_env.py': __file__,
        }
        editable_packages = cluster_pack.get_editable_requirements()
        if 'tf_yarn' in editable_packages:
            tf_yarn_zip = cluster_pack.zip_path(editable_packages['tf_yarn'],
                                                False)
            logger.info(f"zip path for editable tf_yarn is {tf_yarn_zip}")
            files.update({'tf_yarn': tf_yarn_zip})
        service = skein.Service(
            script=f'./{archive_name} check_hadoop_env.py --file {file}',
            resources=skein.Resources(2 * 1024, 1),
            env={
                'PEX_ROOT': '/tmp/{uuid.uuid4()}/',
                'PYTHONPATH': '.:',
            },
            files=files,
            instances=1)
        spec = skein.ApplicationSpec(
            {'HADOOP_ENV_CHECKER': service},
            acls=skein.model.ACLs(enable=True, view_users=['*']),
        )
        app = client.submit_and_connect(spec)

        logging.info('Remote check started')
        result = app.kv.wait('result').decode()
        app_id = app.id
        app.shutdown()
        return result == "True", app_id
示例#2
0
def main():
    # forcing call to model_to_estimator._save_first_checkpoint l457
    # https://github.com/tensorflow/estimator/blob/ \
    # 1d55f01d8af871a35ef83fc3354b9feaa671cbe1/tensorflow_estimator/python/estimator/keras.py
    # otherwise there is a race condition
    # when all workers try to save the first checkpoint at the same time
    experiment_fn(HDFS_DIR)

    pyenv_zip_path, env_name = cluster_pack.upload_env()
    editable_requirements = cluster_pack.get_editable_requirements()
    run_on_yarn(pyenv_zip_path,
                get_safe_exp_fn(),
                task_specs={
                    "chief": TaskSpec(memory="2 GiB", vcores=4),
                    "worker": TaskSpec(memory="2 GiB", vcores=4, instances=4),
                    "ps": TaskSpec(memory="2 GiB", vcores=4, instances=2),
                    "evaluator": TaskSpec(memory="2 GiB", vcores=1)
                },
                files={
                    **editable_requirements,
                    os.path.basename(winequality.__file__):
                    winequality.__file__,
                    os.path.basename(__file__):
                    __file__
                })
示例#3
0
def _add_editable_requirements(files: Optional[Dict[str, str]]):
    editable_requirements = cluster_pack.get_editable_requirements()
    if files is None:
        files = dict()
    for dirname, path in editable_requirements.items():
        if dirname not in files:
            files[dirname] = path
    return files
def main():
    pyenv_zip_path, _ = cluster_pack.upload_env()
    editable_requirements = cluster_pack.get_editable_requirements()
    run_on_yarn(
        pyenv_zip_path,
        get_safe_exp_fn(),
        task_specs={
            "chief": TaskSpec(memory="2 GiB", vcores=4),
            "worker": TaskSpec(memory="2 GiB", vcores=4, instances=(HVD_SIZE - 1)),
            "evaluator": TaskSpec(memory="2 GiB", vcores=1)
        },
        files={
            **editable_requirements,
            os.path.basename(winequality.__file__): winequality.__file__,
            os.path.basename(__file__): __file__,
        },
        custom_task_module="tf_yarn.tasks.gloo_allred_task"
    )
示例#5
0
def main():
    def experiment_fn() -> Experiment:
        # To mitigate issue https://github.com/tensorflow/tensorflow/issues/32159 for tf >= 1.15
        import tensorflow as tf

        def convert_to_tensor(x, y):
            return (tf.convert_to_tensor(value=list(x.values()),
                                         dtype=tf.float32),
                    tf.convert_to_tensor(value=y, dtype=tf.int32))

        def train_input_fn():
            dataset = winequality.get_dataset(WINE_EQUALITY_FILE,
                                              split="train")
            return (dataset.map(convert_to_tensor).shuffle(1000).batch(
                128).repeat())

        def eval_input_fn():
            dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="test")
            return (dataset.map(convert_to_tensor).shuffle(1000).batch(128))

        model = keras.Sequential()
        model.add(
            keras.layers.Dense(units=300,
                               activation="relu",
                               input_shape=(11, )))
        model.add(keras.layers.Dense(units=100, activation="relu"))
        model.add(keras.layers.Dense(units=10, activation="softmax"))
        model.summary()
        model.compile(loss='sparse_categorical_crossentropy',
                      optimizer="sgd",
                      metrics=['accuracy'])

        config = tf.estimator.RunConfig(model_dir=HDFS_DIR)
        estimator = tf.keras.estimator.model_to_estimator(model, config=config)
        return Experiment(
            estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=1000),
            tf.estimator.EvalSpec(eval_input_fn,
                                  steps=10,
                                  start_delay_secs=0,
                                  throttle_secs=30))

    # forcing call to model_to_estimator._save_first_checkpoint l457
    # https://github.com/tensorflow/estimator/blob/ \
    # 1d55f01d8af871a35ef83fc3354b9feaa671cbe1/tensorflow_estimator/python/estimator/keras.py
    # otherwise there is a race condition
    # when all workers try to save the first checkpoint at the same time
    experiment_fn()

    pyenv_zip_path, env_name = cluster_pack.upload_env()
    editable_requirements = cluster_pack.get_editable_requirements()
    run_on_yarn(pyenv_zip_path,
                experiment_fn,
                task_specs={
                    "chief": TaskSpec(memory="2 GiB", vcores=4),
                    "worker": TaskSpec(memory="2 GiB", vcores=4, instances=4),
                    "ps": TaskSpec(memory="2 GiB", vcores=4, instances=2),
                    "evaluator": TaskSpec(memory="2 GiB", vcores=1)
                },
                files={
                    **editable_requirements,
                    os.path.basename(winequality.__file__):
                    winequality.__file__,
                })
        optimizer=lambda: hvd.DistributedOptimizer(tf.train.AdamOptimizer()))

    return Experiment(
        estimator,
        tf.estimator.TrainSpec(train_input_fn,
                               max_steps=10,
                               hooks=[hvd.BroadcastGlobalVariablesHook(0)]),
        tf.estimator.EvalSpec(eval_input_fn,
                              steps=10,
                              start_delay_secs=0,
                              throttle_secs=30))


if __name__ == "__main__":
    pyenv_zip_path, _ = cluster_pack.upload_env()
    editable_requirements = cluster_pack.get_editable_requirements()

    run_on_yarn(pyenv_zip_path,
                experiment_fn,
                task_specs={
                    "chief":
                    TaskSpec(memory="2 GiB", vcores=4),
                    "worker":
                    TaskSpec(memory="2 GiB", vcores=4, instances=1),
                    "evaluator":
                    TaskSpec(memory="2 GiB", vcores=1),
                    "tensorboard":
                    TaskSpec(memory="2 GiB", vcores=1, tb_model_dir=HDFS_DIR)
                },
                files={
                    **editable_requirements,
示例#7
0
def main():
    def experiment_fn() -> KerasExperiment:
        def convert_to_tensor(x, y):
            return (tf.convert_to_tensor(value=list(x.values()),
                                         dtype=tf.float32),
                    tf.convert_to_tensor(value=y, dtype=tf.int32))

        def input_data_fn():
            dataset = winequality.get_dataset(WINE_EQUALITY_FILE,
                                              split="train")
            return (dataset.map(convert_to_tensor).shuffle(1000).batch(
                128).repeat())

        def validation_data_fn():
            dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="test")
            return (dataset.map(convert_to_tensor).shuffle(1000).batch(128))

        model = tf.keras.Sequential()
        model.add(
            tf.keras.layers.Dense(units=300,
                                  activation="relu",
                                  input_shape=(11, )))
        model.add(tf.keras.layers.Dense(units=100, activation="relu"))
        model.add(tf.keras.layers.Dense(units=10, activation="softmax"))
        model.summary()
        opt = tf.keras.optimizers.Adadelta(1.0 * HVD_SIZE)
        opt = hvd.DistributedOptimizer(opt)
        model.compile(loss='sparse_categorical_crossentropy',
                      optimizer=opt,
                      metrics=['accuracy'])
        path_to_checkpoint = f"{HDFS_DIR}" + "/checkpoint-{epoch}"
        my_callbacks = [
            tf.keras.callbacks.ModelCheckpoint(path_to_checkpoint),
            hvd.keras.callbacks.BroadcastGlobalVariablesCallback(0),
        ]
        train_params = {"steps_per_epoch": 1000, "callbacks": my_callbacks}
        return KerasExperiment(model=model,
                               model_dir=HDFS_DIR,
                               train_params=train_params,
                               input_data_fn=input_data_fn,
                               target_data_fn=None,
                               validation_data_fn=validation_data_fn)

    pyenv_zip_path, _ = cluster_pack.upload_env()
    editable_requirements = cluster_pack.get_editable_requirements()
    run_on_yarn(pyenv_zip_path,
                experiment_fn,
                task_specs={
                    "chief":
                    TaskSpec(memory="2 GiB", vcores=4),
                    "worker":
                    TaskSpec(memory="2 GiB",
                             vcores=4,
                             instances=(HVD_SIZE - 1)),
                    "evaluator":
                    TaskSpec(memory="2 GiB", vcores=1)
                },
                files={
                    **editable_requirements,
                    os.path.basename(winequality.__file__):
                    winequality.__file__,
                },
                custom_task_module="tf_yarn.tasks.gloo_allred_task")
示例#8
0
def get_editable_requirements_from_current_venv(
    executable: str = sys.executable,
    editable_packages_dir: str = os.getcwd()):
    return cluster_pack.get_editable_requirements(executable,
                                                  editable_packages_dir)
示例#9
0
def get_editable_requirements(executable: str = sys.executable):
    return cluster_pack.get_editable_requirements(executable)