示例#1
0
def setup_node():
    client = config.batch_client

    create_user.create_user(batch_client=client)

    spark.setup_conf()

    if os.environ['AZ_BATCH_NODE_IS_DEDICATED'] == "true" or os.environ[
            'MIXED_MODE'] == "False":
        is_master = pick_master.find_master(client)
    else:
        is_master = False
        wait_until_master_selected.main()

    master_node_id = pick_master.get_master_node_id(
        config.batch_client.pool.get(config.pool_id))
    master_node = config.batch_client.compute_node.get(config.pool_id,
                                                       master_node_id)

    os.environ["MASTER_IP"] = master_node.ip_address

    if is_master:
        setup_as_master()
        scripts.run_custom_scripts(is_master=True, is_worker=True)

    else:
        setup_as_worker()
        scripts.run_custom_scripts(is_master=False, is_worker=True)

    open("/tmp/setup_complete", 'a').close()
示例#2
0
def setup_host(docker_repo: str, docker_run_options: str):
    """
    Code to be run on the node (NOT in a container)
    :param docker_repo: location of the Docker image to use
    :param docker_run_options: additional command-line options to pass to docker run
    """
    client = config.batch_client

    create_user.create_user(batch_client=client)
    if os.environ["AZ_BATCH_NODE_IS_DEDICATED"] == "true" or os.environ[
            "AZTK_MIXED_MODE"] == "false":
        is_master = pick_master.find_master(client)
    else:
        is_master = False
        wait_until_master_selected.main()

    is_worker = not is_master or os.environ.get(
        "AZTK_WORKER_ON_MASTER") == "true"
    master_node_id = pick_master.get_master_node_id(
        config.batch_client.pool.get(config.pool_id))
    master_node = config.batch_client.compute_node.get(config.pool_id,
                                                       master_node_id)

    if is_master:
        os.environ["AZTK_IS_MASTER"] = "true"
    else:
        os.environ["AZTK_IS_MASTER"] = "false"
    if is_worker:
        os.environ["AZTK_IS_WORKER"] = "true"
    else:
        os.environ["AZTK_IS_WORKER"] = "false"

    os.environ["AZTK_MASTER_IP"] = master_node.ip_address

    cluster_conf = read_cluster_config()

    # setup_node_scheduling(client, cluster_conf, is_master)

    # TODO pass azure file shares
    spark_container.start_spark_container(
        docker_repo=docker_repo,
        docker_run_options=docker_run_options,
        gpu_enabled=os.environ.get("AZTK_GPU_ENABLED") == "true",
        plugins=cluster_conf.plugins,
    )
    plugins.setup_plugins(target=PluginTarget.Host,
                          is_master=is_master,
                          is_worker=is_worker)