def setup_distributed(self, mode, cluster): """Sets up TensorFLow distributed environment and initializes the model. Args: urls (str): the URLs that each node uses to connect. world_rank (int): the index of the runner. world_size (int): the total number of runners. """ self.cluster = cluster tc = BarrierTaskContext().get() self.rank = self._get_rank(cluster, tc) print("cluster is: ", cluster) import os os.environ["TF_CONFIG"] = json.dumps({ 'cluster': { 'worker': cluster }, 'task': { 'type': 'worker', 'index': self.rank } }) ips = set([node.split(":")[0] for node in cluster]) os.environ["no_proxy"] = ",".join(ips) self.strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy( ) # For use in model.evaluate() self.local_model = None self.backend = "tf-distributed"
def _run(it): from pyspark import BarrierTaskContext for i in it: worker_num = i if use_barrier: # use BarrierTaskContext to get placement of all nodes barrier_ctx = BarrierTaskContext.get() tasks = barrier_ctx.getTaskInfos() nodes = [t.address for t in tasks] num_workers = len(nodes) else: nodes = None num_workers = num_executors # use the placement info to help allocate GPUs # note: defaults to CPU if no GPUs present num_gpus = tf_args.num_gpus if 'num_gpus' in tf_args else 1 util.single_node_env(num_gpus=num_gpus, worker_index=worker_num, nodes=nodes) # run the user map_fn ctx = TFSparkNode.TFNodeContext() ctx.defaultFS = defaultFS ctx.worker_num = worker_num ctx.executor_id = worker_num ctx.num_workers = num_workers map_fn(tf_args, ctx) # return a dummy iterator (since we have to use mapPartitions) return [0]
def _run(it): # it为executor的编号 [0 - num_executor-1] from pyspark import BarrierTaskContext for i in it: worker_num = i # executor编号 # barrier是2.4 新特性,新引入的调度模型 # 为了将分布式深度学习嵌入进来 # barrier stage 会为每个task分配一个id,便于task之间交互 # use BarrierTaskContext to get placement of all nodes ctx = BarrierTaskContext.get() tasks = ctx.getTaskInfos() nodes = [t.address for t in tasks] # use the placement info to help allocate GPUs num_gpus = tf_args.num_gpus if 'num_gpus' in tf_args else 1 # 拿出用户指定的每个Task的GPU数量,没有指定默认是1 # 设置环境变量 # 并且如果有空闲GPU就为本task设置空闲GPU util.single_node_env(num_gpus=num_gpus, worker_index=worker_num, nodes=nodes) # run the user map_fn ctx = TFSparkNode.TFNodeContext() ctx.defaultFS = defaultFS ctx.worker_num = worker_num ctx.executor_id = worker_num # executor编号 ctx.num_workers = len(nodes) # executor数量 map_fn(tf_args, ctx) # return a dummy iterator (since we have to use mapPartitions) return [0]
def f(iterator): try: taskContext = BarrierTaskContext.get() except Exception: yield -1 else: yield taskContext.partitionId()
def _start_ray_services(iter): tc = BarrierTaskContext.get() # The address is sorted by partitionId according to the comments # Partition 0 is the Master task_addrs = [taskInfo.address for taskInfo in tc.getTaskInfos()] print(task_addrs) master_ip = task_addrs[0].split(":")[0] print("current address {}".format(task_addrs[tc.partitionId()])) print("master address {}".format(master_ip)) redis_address = "{}:{}".format(master_ip, self.redis_port) process_info = None if tc.partitionId() == 0: print("partition id is : {}".format(tc.partitionId())) process_info = self._start_ray_node( command=self._gen_master_command(), tag="ray-master") process_info.master_addr = redis_address tc.barrier() if tc.partitionId() != 0: print("partition id is : {}".format(tc.partitionId())) process_info = self._start_ray_node( command=RayServiceFuncGenerator._get_raylet_command( redis_address=redis_address, ray_exec=self.ray_exec, password=self.password, ray_node_cpu_cores=self.ray_node_cpu_cores, labels=self.labels, object_store_memory=self.object_store_memory, extra_params=self.extra_params), tag="raylet") yield process_info
def _start_ray_services(iter): tc = BarrierTaskContext.get() # The address is sorted by partitionId according to the comments # Partition 0 is the Master task_addrs = [taskInfo.address for taskInfo in tc.getTaskInfos()] print(task_addrs) master_ip = task_addrs[0].split(":")[0] print("current address {}".format(task_addrs[tc.partitionId()])) print("master address {}".format(master_ip)) redis_address = "{}:{}".format(master_ip, self.redis_port) if tc.partitionId() == 0: print("partition id is : {}".format(tc.partitionId())) process_info = self._start_ray_node(command=self._gen_master_command(), tag="ray-master", wait_after=self.waiting_time_sec) process_info.master_addr = redis_address yield process_info else: print("partition id is : {}".format(tc.partitionId())) process_info = self._start_ray_node( command=self._get_raylet_command(redis_address=redis_address), tag="raylet", wait_before=self.waiting_time_sec) yield process_info tc.barrier()
def context(iterator): tp = TaskContext.get().partitionId() try: bp = BarrierTaskContext.get().partitionId() except Exception: bp = -1 yield (tp, bp, os.getpid())
def info_fn(iter): tc = BarrierTaskContext.get() task_addrs = [ taskInfo.address.split(":")[0] for taskInfo in tc.getTaskInfos() ] yield task_addrs tc.barrier()
def _train_booster(pandas_df_iter): """ Takes in an RDD partition and outputs a booster for that partition after going through the Rabit Ring protocol """ from pyspark import BarrierTaskContext context = BarrierTaskContext.get() context.barrier() if use_gpu: booster_params["gpu_id"] = (context.partitionId() if is_local else _get_gpu_id(context)) _rabit_args = "" if context.partitionId() == 0: _rabit_args = str(_get_rabit_args(context, num_workers)) messages = context.allGather(message=str(_rabit_args)) _rabit_args = _get_args_from_message_list(messages) evals_result = {} with RabitContext(_rabit_args, context): dtrain, dval = None, [] if has_validation: dtrain, dval = _convert_partition_data_to_dmatrix( pandas_df_iter, has_weight, has_validation, has_base_margin, dmatrix_kwargs=dmatrix_kwargs, ) # TODO: Question: do we need to add dtrain to dval list ? dval = [(dtrain, "training"), (dval, "validation")] else: dtrain = _convert_partition_data_to_dmatrix( pandas_df_iter, has_weight, has_validation, has_base_margin, dmatrix_kwargs=dmatrix_kwargs, ) booster = worker_train( params=booster_params, dtrain=dtrain, evals=dval, evals_result=evals_result, **train_call_kwargs_params, ) context.barrier() if context.partitionId() == 0: yield pd.DataFrame( data={ "config": [booster.save_config()], "booster": [booster.save_raw("json").decode("utf-8")] })
def wrapped_train_fn(_): import json import logging import os import socket from contextlib import closing from pyspark import BarrierTaskContext # Sets the TF_CONFIG env var so TF servers # can communicate with each other def set_tf_config(context): addrs = [ e.address.split(':')[0] for e in context.getTaskInfos() ] my_addr = addrs[context.partitionId()] with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as my_sock: my_sock.bind(('', 0)) _, my_port = my_sock.getsockname() my_endpoint = "{}:{}".format(my_addr, my_port) worker_endpoints = context.allGather(my_endpoint) cluster = {'worker': worker_endpoints} tf_config = { 'cluster': cluster, 'task': { 'type': 'worker', 'index': context.partitionId() } } os.environ['TF_CONFIG'] = json.dumps(tf_config) # Sets the CUDA_VISIBLE_DEVICES env var so only # the appropriate GPUS are used def set_gpus(context): gpus_owned = MirroredStrategyRunner._get_gpus_owned( context.resources(), gpu_resource_name) my_num_gpus = (num_slots // num_tasks) + (context.partitionId() < (num_slots % num_tasks)) gpu_addresses = [ str(e) for e in random.sample(gpus_owned, my_num_gpus) ] logging.info(f'Using GPU addresses: {gpu_addresses}') os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(gpu_addresses) context = BarrierTaskContext.get() if use_gpu: set_gpus(context) else: os.environ['CUDA_VISIBLE_DEVICES'] = '' set_tf_config(context) result = run_tensorflow_program(train_fn, use_custom_strategy, **kwargs) if context.partitionId() == 0: return [result] return [None]
def train_fn(): from pyspark import BarrierTaskContext context = BarrierTaskContext.get() cuda_state = os.environ['CUDA_VISIBLE_DEVICES'] if cuda_state: num_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')) else: num_gpus = 0 return [int(e) for e in context.allGather(str(num_gpus))]
def _start_ray_services(iter): from pyspark import BarrierTaskContext from zoo.util.utils import get_node_ip tc = BarrierTaskContext.get() current_ip = get_node_ip() print("current address {}".format(current_ip)) print("master address {}".format(master_ip)) redis_address = "{}:{}".format(master_ip, self.redis_port) process_info = None base_path = tempfile.gettempdir() ray_master_flag_path = os.path.join(base_path, self.ray_master_flag) if current_ip == master_ip: # Start the ray master. # It is possible that multiple executors are on one node. In this case, # the first executor that gets the lock would be the master and it would # create a flag to indicate the master has initialized. # The flag file is removed when ray start processes finish so that this # won't affect other programs. ray_master_lock_path = os.path.join(base_path, self.ray_master_lock) with filelock.FileLock(ray_master_lock_path): if not os.path.exists(ray_master_flag_path): print("partition id is : {}".format(tc.partitionId())) process_info = self._start_ray_node( command=self._gen_master_command(), tag="ray-master") process_info.master_addr = redis_address os.mknod(ray_master_flag_path) tc.barrier() if not process_info: # Start raylets. # Add a lock to avoid starting multiple raylets on one node at the same time. # See this issue: https://github.com/ray-project/ray/issues/10154 raylet_lock_path = os.path.join(base_path, self.raylet_lock) with filelock.FileLock(raylet_lock_path): print("partition id is : {}".format(tc.partitionId())) process_info = self._start_ray_node( command=RayServiceFuncGenerator._get_raylet_command( redis_address=redis_address, ray_exec=self.ray_exec, password=self.password, ray_node_cpu_cores=self.ray_node_cpu_cores, labels=self.labels, object_store_memory=self.object_store_memory, extra_params=self.extra_params), tag="raylet") kill_redundant_log_monitors(redis_address=redis_address) if os.path.exists(ray_master_flag_path): os.remove(ray_master_flag_path) yield process_info
def test_barrier_infos(self): """ Verify that BarrierTaskContext.getTaskInfos() returns a list of all task infos in the barrier stage. """ rdd = self.sc.parallelize(range(10), 4) def f(iterator): yield sum(iterator) taskInfos = rdd.barrier().mapPartitions(f).map(lambda x: BarrierTaskContext.get() .getTaskInfos()).collect() self.assertTrue(len(taskInfos) == 4) self.assertTrue(len(taskInfos[0]) == 4)
def find_ip_and_port(pre_iter): tc = BarrierTaskContext().get() address = tc.getTaskInfos()[tc.partitionId()].address.split(":")[0] with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind(("", 0)) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) tc.barrier() free_ip_port = f"{address}:{s.getsockname()[1]}" return [free_ip_port]
def _start_ray_services(iter): from pyspark import BarrierTaskContext tc = BarrierTaskContext.get() # The address is sorted by partitionId according to the comments # Partition 0 is the Master task_addrs = [taskInfo.address for taskInfo in tc.getTaskInfos()] print(task_addrs) master_ip = task_addrs[0].split(":")[0] print("current address {}".format(task_addrs[tc.partitionId()])) print("master address {}".format(master_ip)) redis_address = "{}:{}".format(master_ip, self.redis_port) process_info = None if tc.partitionId() == 0: print("partition id is : {}".format(tc.partitionId())) process_info = self._start_ray_node( command=self._gen_master_command(), tag="ray-master") process_info.master_addr = redis_address tc.barrier() if tc.partitionId() != 0: import tempfile import filelock base_path = tempfile.gettempdir() lock_path = os.path.join(base_path, "ray_on_spark_start.lock") with filelock.FileLock(lock_path): print("partition id is : {}".format(tc.partitionId())) process_info = self._start_ray_node( command=RayServiceFuncGenerator._get_raylet_command( redis_address=redis_address, ray_exec=self.ray_exec, password=self.password, ray_node_cpu_cores=self.ray_node_cpu_cores, labels=self.labels, object_store_memory=self.object_store_memory, extra_params=self.extra_params), tag="raylet") kill_redundant_log_monitors(redis_address=redis_address) yield process_info
def context_barrier(x): tc = BarrierTaskContext.get() time.sleep(random.randint(1, 5) * 2) tc.barrier() return time.time()
def context_barrier(x): tc = BarrierTaskContext.get() time.sleep(random.randint(1, 10)) tc.barrier() return time.time()
def context_barrier(x): tc = BarrierTaskContext.get() time.sleep(random.randint(1, 10)) tc.barrier() return (time.time(), os.getpid())
def context_barrier(x): tc = BarrierTaskContext.get() time.sleep(random.randint(1, 10)) out = tc.allGather(str(tc.partitionId())) pids = [int(e) for e in out] return pids
def find_ip_and_port(pre_iter): tc = BarrierTaskContext().get() free_port = find_free_port(tc) return [free_port]