def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('env_name', type=str) # parser.add_argument('--env_name', type=str, default='CartPole-v0') parser.add_argument('--exp_name', type=str, default='vpg') parser.add_argument('--render', action='store_true') parser.add_argument('--discount', type=float, default=1.0) parser.add_argument('--n_iter', '-n', type=int, default=100) parser.add_argument('--batch_size', '-b', type=int, default=1000) parser.add_argument('--ep_len', '-ep', type=float, default=-1.) parser.add_argument('--learning_rate', '-lr', type=float, default=5e-3) parser.add_argument('--reward_to_go', '-rtg', action='store_true') # parser.add_argument('--reward_to_go', '-rtg', type=bool, default=True) parser.add_argument('--dont_normalize_advantages', '-dna', action='store_true') parser.add_argument('--nn_baseline', '-bl', action='store_true') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--n_experiments', '-e', type=int, default=1) parser.add_argument('--n_layers', '-l', type=int, default=2) parser.add_argument('--size', '-s', type=int, default=64) parser.add_argument('--dir', '-d', type=str, default='test') args = parser.parse_args() if not (os.path.exists(args.dir)): os.makedirs(args.dir) logdir = args.exp_name + '_' + args.env_name + '_' + time.strftime( "%d-%m-%Y_%H-%M-%S") logdir = os.path.join(args.dir, logdir) if not (os.path.exists(logdir)): os.makedirs(logdir) max_path_length = args.ep_len if args.ep_len > 0 else None processes = [] for e in range(args.n_experiments): seed = args.seed + 10 * e print('Running experiment with seed %d' % seed) def train_func(): train_PG(exp_name=args.exp_name, env_name=args.env_name, n_iter=args.n_iter, gamma=args.discount, min_timesteps_per_batch=args.batch_size, max_path_length=max_path_length, learning_rate=args.learning_rate, reward_to_go=args.reward_to_go, animate=args.render, logdir=os.path.join(logdir, '%d' % seed), normalize_advantages=not (args.dont_normalize_advantages), nn_baseline=args.nn_baseline, seed=seed, n_layers=args.n_layers, size=args.size) p = Process(target=train_func, args=tuple()) p.start() processes.append(p) # if you comment in the line below, then the loop will block # until this process finishes # p.join() for p in processes: p.join()
import os import torch import torch.distributed as dist from torch.multiprocessing import Process def run(rank, size): """ Distributed function to be implemented later. """ pass def init_process(rank, size, fn, backend='gloo'): """ Initialize the distributed environment. """ os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = '29500' dist.init_process_group(backend, rank=rank, world_size=size) fn(rank, size) if __name__ == "__main__": print(torch.distributed.is_available()) print(torch.distributed.is_nccl_avaiable()) size = 2 processes = [] for rank in range(size): p = Process(target=init_process, args=(rank, size, run)) p.start() processes.append(p) for p in processes: p.join()
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--world", default=1, type=int) parser.add_argument("--rank", default=0, type=int) parser.add_argument("--gpu", type=str) parser.add_argument("--master", default="127.0.0.1", type=str) parser.add_argument("--port", default="14007", type=str) parser.add_argument("--dataset", default="ImageTarDataset", type=str) parser.add_argument("--batch", default=256, type=int) parser.add_argument("--datapath", type=str) parser.add_argument("--data_dir", type=str) args = parser.parse_args() world_size = args.world node_id = args.rank node_size = int(args.gpu) batch_size = args.batch processes = [] for local_rank in range(node_size): p = Process(target=init_processes, args=(args.master, args.port, args.dataset, args.datapath, batch_size, (node_id * node_size) + local_rank, node_size * world_size, run)) p.start() processes.append(p) for p in processes: p.join()
sac_trainer.policy_net.share_memory() # model sac_trainer.log_alpha.share_memory_() # variable ShareParameters(sac_trainer.soft_q_optimizer1) ShareParameters(sac_trainer.soft_q_optimizer2) ShareParameters(sac_trainer.policy_optimizer) ShareParameters(sac_trainer.alpha_optimizer) rewards_queue = mp.Queue( ) # used for get rewards from all processes and plot the curve num_workers = 2 # or: mp.cpu_count() processes = [] rewards = [] for i in range(num_workers): process = Process(target=worker, args=(i, sac_trainer, ENV, rewards_queue, replay_buffer, max_episodes, max_steps, batch_size, explore_steps, \ update_itr, AUTO_ENTROPY, DETERMINISTIC, hidden_dim, model_path)) # the args contain shared and not shared process.daemon = True # all processes closed when the main stops processes.append(process) [p.start() for p in processes] while True: # keep geting the episode reward from the queue r = rewards_queue.get() if r is not None: rewards.append(r) else: break if len(rewards) % 20 == 0 and len(rewards) > 0: plot(rewards) [p.join() for p in processes] # finished at the same time
'L': False, 'SO': False, 'OPT': 'Adam', 'gpu_ids': [1, 1, 1, 1, 1, 1, 1, 1], 'env_ids': [0, 1, 2, 3, 4, 5, 6, 8], 'mr': 1, 'mn': 'tdw_ppo_a3c_model_relation', 'lmn': 'tdw_ppo_a3c_model_relation', 'loss': 'relation_TDW_reward.p' } if __name__ == '__main__': torch.backends.cudnn.benchmark = False processes = [] loadarguments() torch.manual_seed(args['seed']) torch.cuda.manual_seed(args['seed']) mp.set_start_method('spawn') #train(args,optimizer,0,shared_model) for rank in range(0, args['W']): p = Process(target=train, args=(args, optimizer, rank, shared_model, step_loss)) p.start() processes.append(p) time.sleep(10) for p in processes: p.join() time.sleep(10)
if p_queue.empty(): sleep(.1) else: s_id, params = p_queue.get() r_queue.put((s_id, r_gen.rollout(params))) ################################################################################ # Define queues and start workers # ################################################################################ p_queue = Queue() r_queue = Queue() e_queue = Queue() for p_index in range(num_workers): Process(target=slave_routine, args=(p_queue, r_queue, e_queue, p_index)).start() ################################################################################ # Evaluation # ################################################################################ def evaluate(solutions, results, rollouts=100): """ Give current controller evaluation. Evaluation is minus the cumulated reward averaged over rollout runs. :args solutions: CMA set of solutions :args results: corresponding results :args rollouts: number of rollouts :returns: minus averaged cumulated reward
def register(freq, func, args): print(f'Registered {func} with freq: {freq}') p = Process(target=periodic_executor, args=(freq, func, args)) p.daemon = True p.start()
logger.info(f'Rank {rank} Terminated') logger.info(f'Rank {rank} Total Time:') logger.info(total_train_time) def init_processes(rank, size, fn, backend='gloo'): global DEVICE, logger os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = '29500' dist.init_process_group(backend, rank=rank, world_size=size) # Configuring multiple GPU if not debug_mode_enabled and isinstance(gpu, list): DEVICE = "cuda:{}".format(gpu[rank]) torch.cuda.set_device(gpu[rank]) logger = hlsgd_logging.init_logger(args, rank) fn(rank, size) if __name__ == "__main__": processes = [] for rank in range(world_size): p = Process(target=init_processes, args=(rank, world_size, run)) p.start() processes.append(p) for p in processes: p.join()
def train_ai2thor(model, args, rank=0, b=None): seed = args.seed + 10000 * rank torch.manual_seed(seed) np.random.seed(seed) # torch.cuda.set_device(rank) device = torch.device(f'cuda:{rank}') os.environ['DISPLAY'] = f':{rank}' model = model.to(device) model.share_memory() # Experience buffer storage = PPOBuffer(model.obs_shape, args.steps, args.num_workers, args.state_size, args.gamma, device=device) storage.share_memory() #torch.multiprocessing.set_start_method('spawn') # start multiple processes ready_to_works = [Event() for _ in range(args.num_workers)] exit_flag = Value('i', 0) queue = SimpleQueue() processes = [] task_config_file = "config_files/multiMugTaskTrain.json" # start workers for worker_id in range(args.num_workers): p = Process(target=worker, args=(worker_id, model, storage, ready_to_works[worker_id], queue, exit_flag, task_config_file)) p.start() processes.append(p) # start trainer train_params = { "epochs": args.epochs, "steps": args.steps, "world_size": args.world_size, "num_workers": args.num_workers } ppo_params = { "clip_param": args.clip_param, "train_iters": args.train_iters, "mini_batch_size": args.mini_batch_size, "value_loss_coef": args.value_loss_coef, "entropy_coef": args.entropy_coef, "rnn_steps": args.rnn_steps, "lr": args.lr, "max_kl": args.max_kl } distributed = False if args.world_size > 1: distributed = True # Initialize Process Group, distributed backend type dist_backend = 'nccl' # Url used to setup distributed training dist_url = "tcp://127.0.0.1:23456" print("Initialize Process Group... pid:", os.getpid()) dist.init_process_group(backend=dist_backend, init_method=dist_url, rank=rank, world_size=args.world_size) # Make model DistributedDataParallel model = DistributedDataParallel(model, device_ids=[rank], output_device=rank) learner(model, storage, train_params, ppo_params, ready_to_works, queue, exit_flag, rank, distributed, b) for p in processes: print("process ", p.pid, " joined") p.join()
ac_kwargs['action_space'] = env.action_space ac_kwargs['state_size'] = args.state_size env.close() # Main model print("Initialize Model...") # Construct Model ac_model = ActorCritic(obs_shape=obs_dim, **ac_kwargs) if args.model_path: ac_model.load_state_dict(torch.load(args.model_path)) # Count variables var_counts = tuple( count_vars(m) for m in [ac_model.policy, ac_model.value_function, ac_model.feature_base]) print('\nNumber of parameters: \t pi: %d, \t v: %d \tbase: %d\n' % var_counts) if args.world_size > 1: processes = [] b = Barrier(args.world_size) for rank in range(args.world_size): p = Process(target=train_ai2thor, args=(ac_model, args, rank, b)) p.start() processes.append(p) for p in processes: p.join() print("process ", p.pid, " joined") else: train_ai2thor(ac_model, args) print("main exits")
parser.add_argument('--seed', type=int, default=1, help='seed used for initialization') parser.add_argument('--master_address', type=str, default='127.0.0.1', help='address for master') parser.add_argument("--x", default=50, type=int) parser.add_argument("--y", default=99, type=int) parser.add_argument("--t", default=3, type=int) parser.add_argument('--source_folder', default="/Users/alessandrozonta/Desktop/", help="root where to store data") args = parser.parse_args() utils.create_exp_dir(args.save) size = args.world_size if size > 1: args.distributed = True processes = [] for rank in range(size): args.local_rank = rank p = Process(target=init_processes, args=(rank, size, main, args)) p.start() processes.append(p) for p in processes: p.join() else: # for debugging print('starting in debug mode') args.distributed = True init_processes(0, size, main, args)
default=1, help='seed used for initialization') args = parser.parse_args() args.save = args.root + '/eval-' + args.save utils.create_exp_dir(args.save) size = args.num_process_per_node if size > 1: args.distributed = True processes = [] for rank in range(size): args.local_rank = rank global_rank = rank + args.node_rank * args.num_process_per_node global_size = args.num_proc_node * args.num_process_per_node args.global_rank = global_rank print('Node rank %d, local proc %d, global proc %d' % (args.node_rank, rank, global_rank)) p = Process(target=init_processes, args=(global_rank, global_size, main, args)) p.start() processes.append(p) for p in processes: p.join() else: # for debugging print('starting in debug mode') args.distributed = True init_processes(0, size, main, args)
def local_process(target, args): return Process(target=target, args=args)