def setup_node(): client = config.batch_client create_user.create_user(batch_client=client) spark.setup_conf() if os.environ['AZ_BATCH_NODE_IS_DEDICATED'] == "true" or os.environ[ 'MIXED_MODE'] == "False": is_master = pick_master.find_master(client) else: is_master = False wait_until_master_selected.main() master_node_id = pick_master.get_master_node_id( config.batch_client.pool.get(config.pool_id)) master_node = config.batch_client.compute_node.get(config.pool_id, master_node_id) os.environ["MASTER_IP"] = master_node.ip_address if is_master: setup_as_master() scripts.run_custom_scripts(is_master=True, is_worker=True) else: setup_as_worker() scripts.run_custom_scripts(is_master=False, is_worker=True) open("/tmp/setup_complete", 'a').close()
def setup_host(docker_repo: str, docker_run_options: str): """ Code to be run on the node (NOT in a container) :param docker_repo: location of the Docker image to use :param docker_run_options: additional command-line options to pass to docker run """ client = config.batch_client create_user.create_user(batch_client=client) if os.environ["AZ_BATCH_NODE_IS_DEDICATED"] == "true" or os.environ[ "AZTK_MIXED_MODE"] == "false": is_master = pick_master.find_master(client) else: is_master = False wait_until_master_selected.main() is_worker = not is_master or os.environ.get( "AZTK_WORKER_ON_MASTER") == "true" master_node_id = pick_master.get_master_node_id( config.batch_client.pool.get(config.pool_id)) master_node = config.batch_client.compute_node.get(config.pool_id, master_node_id) if is_master: os.environ["AZTK_IS_MASTER"] = "true" else: os.environ["AZTK_IS_MASTER"] = "false" if is_worker: os.environ["AZTK_IS_WORKER"] = "true" else: os.environ["AZTK_IS_WORKER"] = "false" os.environ["AZTK_MASTER_IP"] = master_node.ip_address cluster_conf = read_cluster_config() # setup_node_scheduling(client, cluster_conf, is_master) # TODO pass azure file shares spark_container.start_spark_container( docker_repo=docker_repo, docker_run_options=docker_run_options, gpu_enabled=os.environ.get("AZTK_GPU_ENABLED") == "true", plugins=cluster_conf.plugins, ) plugins.setup_plugins(target=PluginTarget.Host, is_master=is_master, is_worker=is_worker)