def affinitize_task_to_master(batch_client, cluster_id, task): pool = batch_client.pool.get(config.pool_id) master_node_id = get_master_node_id(pool) master_node = batch_client.compute_node.get(pool_id=cluster_id, node_id=master_node_id) task.affinity_info = batch_models.AffinityInformation( affinity_id=master_node.affinity_id) return task
def start_spark_worker(): wait_for_master() exe = os.path.join(spark_home, "sbin", "start-slave.sh") master_node_id = pick_master.get_master_node_id( batch_client.pool.get(config.pool_id)) master_node = get_node(master_node_id) cmd = [ exe, "spark://{0}:7077".format(master_node.ip_address), "--webui-port", str(config.spark_worker_ui_port) ] print("Connecting to master with '{0}'".format(" ".join(cmd))) call(cmd)
def setup_connection(): """ This setup spark config with which nodes are slaves and which are master """ master_node_id = pick_master.get_master_node_id( batch_client.pool.get(config.pool_id)) master_node = get_node(master_node_id) master_config_file = os.path.join(spark_conf_folder, "master") master_file = open(master_config_file, "w", encoding="UTF-8") print("Adding master node ip {0} to config file '{1}'".format( master_node.ip_address, master_config_file)) master_file.write("{0}\n".format(master_node.ip_address)) master_file.close()
def main(): master = None while master is None: try: from aztk.node_scripts.core import config from aztk.node_scripts.install.pick_master import get_master_node_id batch_client = config.batch_client pool = batch_client.pool.get(config.pool_id) master = get_master_node_id(pool) time.sleep(1) except Exception as e: print(e) time.sleep(1)
def wait_for_master(): print("Waiting for master to be ready.") master_node_id = pick_master.get_master_node_id( batch_client.pool.get(config.pool_id)) if master_node_id == config.node_id: return while True: master_node = get_node(master_node_id) if master_node.state in [ batchmodels.ComputeNodeState.idle, batchmodels.ComputeNodeState.running ]: break else: print("{0} Still waiting on master", datetime.datetime.now()) time.sleep(10)
def setup_host(docker_repo: str, docker_run_options: str): """ Code to be run on the node (NOT in a container) :param docker_repo: location of the Docker image to use :param docker_run_options: additional command-line options to pass to docker run """ client = config.batch_client create_user.create_user(batch_client=client) if os.environ["AZ_BATCH_NODE_IS_DEDICATED"] == "true" or os.environ["AZTK_MIXED_MODE"] == "false": is_master = pick_master.find_master(client) else: is_master = False wait_until_master_selected.main() is_worker = not is_master or os.environ.get("AZTK_WORKER_ON_MASTER") == "true" master_node_id = pick_master.get_master_node_id(config.batch_client.pool.get(config.pool_id)) master_node = config.batch_client.compute_node.get(config.pool_id, master_node_id) if is_master: os.environ["AZTK_IS_MASTER"] = "true" else: os.environ["AZTK_IS_MASTER"] = "false" if is_worker: os.environ["AZTK_IS_WORKER"] = "true" else: os.environ["AZTK_IS_WORKER"] = "false" os.environ["AZTK_MASTER_IP"] = master_node.ip_address cluster_conf = read_cluster_config() # TODO pass azure file shares spark_container.start_spark_container( docker_repo=docker_repo, docker_run_options=docker_run_options, gpu_enabled=os.environ.get("AZTK_GPU_ENABLED") == "true", plugins=cluster_conf.plugins, ) plugins.setup_plugins(target=PluginTarget.Host, is_master=is_master, is_worker=is_worker)