def run_mnist_2gpu(self, target_file_name, eager_mode=True): if not fluid.core.is_compiled_with_cuda( ) or fluid.core.get_cuda_device_count() == 0: return selected_gpus = get_gpus('0,1') cluster = None pod = None cluster, pod = get_cluster_from_args(selected_gpus) procs = start_local_trainers( cluster, pod, eager_mode=eager_mode, training_script=target_file_name, training_script_args=[]) while True: alive = watch_local_trainers(procs, cluster.trainers_endpoints()) if not alive: print("Local procs complete, POD info:{}".format(pod)) break time.sleep(3)
def run_mnist_2cpu(self, target_file_name): cluster, pod = get_cluster_from_args( [0, 1]) #tmp use. for getting trainer_nranks() procs = start_local_trainers_cpu(cluster.trainers_endpoints(), training_script=target_file_name, training_script_args=[]) while True: alive = watch_local_trainers(procs, cluster.trainers_nranks()) if not alive: print("Local procs complete, POD info:{}".format(pod)) break time.sleep(3)