def __init__(self, data, batch_size, num_steps=1, sample_coverage=50, save_dir=None, num_workers=0, log=True): assert data.edge_index is not None assert 'node_norm' not in data assert 'edge_norm' not in data self.N = N = data.num_nodes self.E = data.num_edges self.adj = SparseTensor(row=data.edge_index[0], col=data.edge_index[1], value=data.edge_attr, sparse_sizes=(N, N)) self.data = copy.copy(data) self.data.edge_index = None self.data.edge_attr = None self.batch_size = batch_size self.num_steps = num_steps self.sample_coverage = sample_coverage self.num_workers = num_workers self.log = log self.__count__ = 0 if self.num_workers > 0: self.__sample_queue__ = Queue() self.__sample_workers__ = [] for _ in range(self.num_workers): worker = Process(target=self.__put_sample__, args=(self.__sample_queue__, )) worker.daemon = True worker.start() self.__sample_workers__.append(worker) path = osp.join(save_dir or '', self.__filename__) if save_dir is not None and osp.exists(path): # pragma: no cover self.node_norm, self.edge_norm = torch.load(path) else: self.node_norm, self.edge_norm = self.__compute_norm__() if save_dir is not None: # pragma: no cover torch.save((self.node_norm, self.edge_norm), path) if self.num_workers > 0: self.__data_queue__ = Queue() self.__data_workers__ = [] for _ in range(self.num_workers): worker = Process(target=self.__put_data__, args=(self.__data_queue__, )) worker.daemon = True worker.start() self.__data_workers__.append(worker)
def main(): # reproducible # env.seed(RANDOMSEED) np.random.seed(RANDOMSEED) torch.manual_seed(RANDOMSEED) env = gym.make(ENV_NAME) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] ppo = PPO(state_dim, action_dim, hidden_dim=HIDDEN_DIM) if args.train: ppo.actor.share_memory() ppo.critic.share_memory() ShareParameters(ppo.actor_optimizer) ShareParameters(ppo.critic_optimizer) rewards_queue = mp.Queue( ) # used for get rewards from all processes and plot the curve processes = [] rewards = [] for i in range(NUM_WORKERS): process = Process( target=worker, args=(i, ppo, rewards_queue)) # the args contain shared and not shared process.daemon = True # all processes closed when the main stops processes.append(process) [p.start() for p in processes] while True: # keep geting the episode reward from the queue r = rewards_queue.get() if r is not None: if len(rewards) == 0: rewards.append(r) else: rewards.append(rewards[-1] * 0.9 + r * 0.1) else: break if len(rewards) % 20 == 0 and len(rewards) > 0: plot(rewards) [p.join() for p in processes] # finished at the same time ppo.save_model(MODEL_PATH) if args.test: ppo.load_model(MODEL_PATH) while True: s = env.reset() eps_r = 0 for i in range(EP_LEN): env.render() s, r, done, _ = env.step(ppo.choose_action(s, True)) eps_r += r if done: break print('Episode reward: {} | Episode length: {}'.format(eps_r, i))
def init_jobs(queue, batch_size,num_features): # Process를 여러개 만들수도 있다. Peocess list로 task = Process(target=prefetch_data, args=(queue, batch_size,num_features)) task.daemon = True task.start() return task
def run(self): # mp.set_start_method('spawn', force=True) with Manager() as manager: d = manager.dict() self.d = d d['train_progress'] = [] d['best_epoch'] = None d['best_epoch_summary'] = None d['model'] = None d['labels'] = None pqueue = mp.Queue() out_pqueue = mp.Queue() model = self.trainer_kwargs['model'] # model = copy.deepcopy(model) model.share_memory() self.trainer_kwargs['model'] = model self.trainer_kwargs['num_workers'] = 0 p = Process(target=main_q, args=(pqueue, out_pqueue, d)) p.daemon = True p.start() # pool.apply_async(main_q, args=(pqueue, out_pqueue, d, )) # pool.apply_async(main_train, args=(d, self.num_epochs, self.trainer_args, self.trainer_kwargs, self.datbaase_items)) # pool.starmap(main_q, [(pqueue, out_pqueue, d),]) pqueue.put(None) pqueue.put(self.num_epochs) pqueue.put(self.trainer_args) pqueue.put(self.trainer_kwargs) pqueue.put(self.database_items) p.join() # pool.close() # pool.join() print('Process results: ', len(d.keys())) # best_epoch = d['best_epoch'] # best_epoch_sumamry = d['best_epoch_summary'] # model = d['model'] # labels = d['labels'] self.d = get_queue_dict(out_pqueue, item_names=[ 'best_epoch', 'best_epoch_summary', 'model', 'labels', ]) best_epoch = self.d['best_epoch'] best_epoch_sumamry = self.d['best_epoch_summary'] model = model.load_state_dict(self.d['model']) labels = self.d['labels'] self.d = { "train_progress": d['train_progress'], } # best_epoch, best_epoch_summary = self.trainer.train(epochs=self.num_epochs) self.complete_func( self.host, { "best_epoch": best_epoch, "best_epoch_summary": best_epoch_sumamry, "model": model, "labels": labels, })
def init_parallel_jobs(cfg, queue, fn, ped_data=None, emp_data=None): tasks = Process(target=prefetch_data, args=(cfg, queue, fn, ped_data, emp_data)) # for task in tasks: # task.daemon = True # task.start() tasks.daemon = True tasks.start() return tasks
def daemon_process_run(read_conn, write_conn, lock, testcase=0): ''' Two test case: 1. generated data from background process 2. read data from PCIE background process ''' # testcase 1: Generate data from background process if testcase == 0: read_proc = Process(target=gen_data, args=(shared_arr, npts, nCh, lock)) # testcase 2: Read data from background process elif testcase == 1: read_proc = Process(target=get_data, args=(shared_arr, npts, read_conn)) write_proc = Process(target=write_data, args=(shared_arr, write_conn)) read_proc.daemon = True if testcase == 1: write_proc.daemon = True read_proc.start() if testcase == 1: write_proc.start()
def specific_policy_learn(epi, environment_params, environment_wrappers, environment_wrapper_arguments, no_reset=True): """ multi-process for learning the task-specific policy rather than using the single-process in epi class """ epi.load_model('predictor_and_embedding') epi.load_model('epi_policy') epi.task_specific_policy.share_memory() rewards_queue = mp.Queue( ) # used for get rewards from all processes and plot the curve eval_rewards_queue = mp.Queue( ) # used for get offline evaluated rewards from all processes and plot the curve success_queue = mp.Queue( ) # used for get success events from all processes eval_success_queue = mp.Queue() processes = [] rewards = [] success = [] eval_rewards = [] eval_success = [] for i in range(NUM_WORKERS): if TASK_POLICY_ALG == 'ppo': process = Process(target=ppo_worker, args=(i, epi, environment_params, environment_wrappers, \ environment_wrapper_arguments, eval_rewards_queue, eval_success_queue, batch_size, no_reset)) # the args contain shared and not shared elif TASK_POLICY_ALG == 'td3': process = Process(target=td3_worker, args=(i, epi, environment_params, environment_wrappers,\ environment_wrapper_arguments, rewards_queue, eval_rewards_queue, success_queue, eval_success_queue,\ replay_buffer, batch_size, explore_steps, noise_decay,\ update_itr, explore_noise_scale, eval_noise_scale, reward_scale, DETERMINISTIC, hidden_dim, no_reset)) else: raise NotImplementedError process.daemon = True # all processes closed when the main stops processes.append(process) [p.start() for p in processes] while True: # keep geting the episode reward from the queue eval_r = eval_rewards_queue.get() eval_succ = eval_success_queue.get() eval_rewards.append(eval_r) eval_success.append(eval_succ) if len(eval_rewards) % 20 == 0 and len(eval_rewards) > 0: np.save(PREFIX + 'eval_rewards', eval_rewards) np.save(PREFIX + 'eval_success', eval_success) [p.join() for p in processes] # finished at the same time
def __iter__(self): print('Starting processes') random.seed(0) random.shuffle(self.filepaths) filepaths = deque() for path in self.filepaths: filepaths.append(path) self.buffr_processes = [] args = (self.filepaths, self.buffer, self.partial) for i in range(10): process = Process(target=fill_buffer, args=args) process.daemon = True process.start() self.buffr_processes.append(process) args = (self.buffer, self.batch_queue, self.batch_size) self.batch_process = Process(target=fill_batch, args=args) self.batch_process.daemon = True self.batch_process.start() return self
def __init__(self, loader): self.loader = loader self.data_source = loader.data_source self.args = loader.args self.num_workers = 8 self.batch_size = loader.batch_size self.tokenizer = loader.tokenizer self.max_len = loader.max_len self.mode = loader.mode self._batch_count_in_queue = 0 self._data = self.get_data() self.workers = [] if self.mode in {'train', 'eval'}: self.input_queue = Queue(-1) self.output_queue = Queue(-1) for _ in range(self.num_workers): worker = Process(target=self._data_loop) self.workers.append(worker) self.__prefetch() for worker in self.workers: worker.daemon = True worker.start()
def crop_face(args): for k, v in default_args.items(): setattr(args, k, v) assert osp.exists(args.data_dir), "The input dir not exist" root_folder_name = args.data_dir.split('/')[-1] src_folder = args.data_dir dst_folder = args.data_dir.replace(root_folder_name, root_folder_name + '_OPPOFaces') lz.mkdir_p(dst_folder, delete=False) ds = TestData(src_folder) loader = torch.utils.data.DataLoader(ds, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, pin_memory=True, drop_last=False ) # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. load dlib model for face detection and landmark used for face cropping queue = Queue() lock = Lock() consumers = [] for i in range(args.num_consumers): p = Process(target=consumer, args=(queue, lock)) p.daemon = True consumers.append(p) for c in consumers: c.start() # 3. forward ttl_nimgs = 0 ttl_imgs = [] data_meter = lz.AverageMeter() model_meter = lz.AverageMeter() post_meter = lz.AverageMeter() lz.timer.since_last_check('start crop face') for ind, data in enumerate(loader): data_meter.update(lz.timer.since_last_check(verbose=False)) if (data['finish'] == 1).all().item(): logging.info('finish') break if ind % 10 == 0: logging.info( f'proc batch {ind}, data time: {data_meter.avg:.2f}, model: {model_meter.avg:.2f}, post: {post_meter.avg:.2f}') mask = data['finish'] == 0 input = data['img'][mask] input_np = input.numpy() roi_box = data['roi_box'][mask].numpy() imgfn = np.asarray(data['imgfn'])[mask.numpy().astype(bool)] dst_imgfn = [img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces') for img_fp in imgfn] ttl_imgs.extend(dst_imgfn) ttl_nimgs += mask.sum().item() with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().astype(np.float32) model_meter.update(lz.timer.since_last_check(verbose=False)) queue.put((imgfn, param, roi_box, dst_imgfn)) # pts68 = [predict_68pts(param[i], roi_box[i]) for i in range(param.shape[0])] # pts68_proc = [predict_68pts(param[i], [0, 0, STD_SIZE, STD_SIZE]) for i in range(param.shape[0])] # for img_fp, pts68_, pts68_proc_, img_, dst in zip(imgfn, pts68, pts68_proc, input_np, dst_imgfn): # ## this may need opt to async read write # img_ori = cvb.read_img(img_fp) # pts5 = to_landmark5(pts68_[:2, :].transpose()) # warped = preprocess(img_ori, landmark=pts5) # # plt_imshow(warped, inp_mode='bgr'); plt.show() # lz.mkdir_p(osp.dirname(dst), delete=False) # cvb.write_img(warped, dst) # # ## this may cause black margin # # pts5 = to_landmark5(pts68_proc_[:2, :].transpose()) # # warped = preprocess(to_img(img_), landmark=pts5) # # # plt_imshow(warped, inp_mode='bgr'); plt.show() # # dst = img_fp.replace(root_folder_name, root_folder_name + '_OPPOFaces') # # cvb.write_img(warped, dst) # if args.dump_res: # img_ori = cvb.read_img(img_fp) # pts_res = [pts68_] # dst = img_fp.replace(root_folder_name, root_folder_name + '_kpts.demo') # lz.mkdir_p(osp.dirname(dst), delete=False) # draw_landmarks(img_ori, pts_res, # wfp=dst, # show_flg=args.show_flg) post_meter.update(lz.timer.since_last_check(verbose=False)) lz.msgpack_dump(ttl_imgs, dst_folder + '/' + 'all_imgs.pk') del model, input torch.cuda.empty_cache() while not queue.empty(): time.sleep(1)
def main(): # reproducible # env.seed(RANDOMSEED) np.random.seed(RANDOMSEED) torch.manual_seed(RANDOMSEED) env = NormalizedActions(gym.make(ENV_NAME).unwrapped) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] ppo = PPO(state_dim, action_dim, hidden_dim=256) try: ppo.load_model(MODEL_PATH) except Exception as e: print( f'Pretrained models not found in {MODEL_PATH}.\nBuckle up it is going to be a long hot night' ) print("error {}".format(e)) if args.train: ppo.actor.share_memory() ppo.actor_old.share_memory() ppo.critic.share_memory() ShareParameters(ppo.actor_optimizer) ShareParameters(ppo.critic_optimizer) rewards_queue = mp.Queue( ) # used for get rewards from all processes and plot the curve processes = [] rewards = [] for i in range(NUM_WORKERS): process = Process( target=worker, args=(i, ppo, rewards_queue)) # the args contain shared and not shared process.daemon = True # all processes closed when the main stops processes.append(process) [p.start() for p in processes] while True: # keep geting the episode reward from the queue r = rewards_queue.get() if r is not None: if len(rewards) == 0: rewards.append(r) else: rewards.append(rewards[-1] * 0.9 + r * 0.1) else: break if len(rewards) % 20 == 0 and len(rewards) > 0: plot(rewards) [p.join() for p in processes] # finished at the same time ppo.save_model(MODEL_PATH) if args.test: ppo.load_model(MODEL_PATH) while True: s = env.reset() for i in range(EP_LEN): env.render() s, r, done, _ = env.step(ppo.choose_action(s)) if done: break
def register(freq, func, args): print(f'Registered {func} with freq: {freq}') p = Process(target=periodic_executor, args=(freq, func, args)) p.daemon = True p.start()
def async_save(model_states: OrderedDict, filename) -> Process: model_states = copy_cpu_state_dict(model_states) p = Process(target=_save, args=(model_states, filename), daemon=True) p.daemon = True p.start() return p
def train_eval(name, model, dataset, optimizer, scheduler, lr=1e-1, weight_decay=5e-4, bs=128, n_epochs=300, start_epoch=None, print_freq=1000, val_freq=10000, checkpoint_folder=None, version=-1, use_jit=True, use_amp=False, opt_level='O1', **kwargs): assert dataset in ('cifar10', 'cifar100') if use_amp: import apex net = model(num_classes=10 if dataset == 'cifar10' else 100, default_init=False) net = net.to(device[0]) opt_sig = signature(optimizer) opt_kwargs = dict([(k, kwargs[k]) for k in kwargs.keys() if k in opt_sig.parameters.keys()]) optimizer = optimizer(net.trainable, lr=lr, weight_decay=weight_decay, **opt_kwargs) if scheduler is not None: sch_sig = signature(scheduler) sch_kwargs = dict([(k, kwargs[k]) for k in kwargs.keys() if k in sch_sig.parameters.keys()]) scheduler = scheduler(optimizer, **sch_kwargs) dataset_ = torchvision.datasets.CIFAR10 if dataset == 'cifar10' else torchvision.datasets.CIFAR100 train_data = dataset_(root='./data', train=True, download=True, transform=transform_train) train_loader = T.utils.data.DataLoader(train_data, batch_size=bs, shuffle=True, num_workers=5) if checkpoint_folder is None: mon = nnt.Monitor(name, print_freq=print_freq, num_iters=int(np.ceil(len(train_data) / bs)), use_tensorboard=True) mon.backup(backup_files) mon.dump_rep('network', net) mon.dump_rep('optimizer', optimizer) states = { 'model_state_dict': net.state_dict(), 'opt_state_dict': optimizer.state_dict() } if scheduler is not None: mon.dump_rep('scheduler', scheduler) states['scheduler_state_dict'] = scheduler.state_dict() else: mon = nnt.Monitor(current_folder=checkpoint_folder, print_freq=print_freq, num_iters=len(train_data) // bs, use_tensorboard=True) states = mon.load('training.pt', method='torch', version=version) net.load_state_dict(states['model_state_dict']) optimizer.load_state_dict(states['opt_state_dict']) if scheduler: scheduler.load_state_dict(states['scheduler_state_dict']) if use_amp and 'amp' in states.keys(): apex.amp.load_state_dict(states['amp']) if start_epoch: start_epoch = start_epoch - 1 mon.epoch = start_epoch print('Resume from epoch %d...' % mon.epoch) if not no_wait_eval: eval_data = dataset_(root='./data', train=False, download=True, transform=transform_test) eval_loader = T.utils.data.DataLoader(eval_data, batch_size=bs, shuffle=False, num_workers=2) if nnt.cuda_available: train_loader = nnt.DataPrefetcher(train_loader, device=device[0]) if not no_wait_eval: eval_loader = nnt.DataPrefetcher(eval_loader, device=device[0]) if use_jit: img = T.rand(1, 3, 32, 32).to(device[0]) net.train(True) net_train = T.jit.trace(net, img) net.eval() net_eval = T.jit.trace(net, img) if use_amp: if use_jit: net_train, optimizer = apex.amp.initialize(net_train, optimizer, opt_level=opt_level) net_eval = apex.amp.initialize(net_eval, opt_level=opt_level) else: net, optimizer = apex.amp.initialize(net, optimizer, opt_level=opt_level) if 'amp' not in states.keys(): states['amp'] = apex.amp.state_dict() if use_jit: net_train = T.nn.DataParallel(net_train, device_ids=device) net_eval = T.nn.DataParallel(net_eval, device_ids=device) else: net = T.nn.DataParallel(net, device_ids=device) def learn(images, labels, reduction='mean'): net.train(True) optimizer.zero_grad() loss, accuracy = get_loss(net_train if use_jit else net, images, labels, reduction=reduction) if not (T.isnan(loss) or T.isinf(loss)): if use_amp: with apex.amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() else: raise ValueError('NaN encountered!') mon.plot('train-loss', nnt.utils.to_numpy(loss), smooth=.99) mon.plot('train-accuracy', nnt.utils.to_numpy(accuracy), smooth=.99) del loss, accuracy if no_wait_eval: q = Queue() eval_proc = Process(target=eval_queue, args=(q, mon.current_folder, dataset, bs, use_jit, use_amp, opt_level)) eval_proc.daemon = True eval_proc.start() start_epoch = mon.epoch if start_epoch is None else start_epoch print('Training...') with T.jit.optimized_execution(use_jit): for _ in mon.iter_epoch(range(start_epoch, n_epochs)): for idx, lr_ in enumerate(scheduler.get_last_lr()): mon.plot('lr-%d' % idx, lr_, filter_outliers=False) for batch in mon.iter_batch(train_loader): batch = nnt.utils.batch_to_device(batch, device[0]) learn(*batch) if val_freq and mon.iter % val_freq == 0: if no_wait_eval: lock.acquire_write() mon.dump('tmp.pt', states, method='torch') lock.release_write() q.put((mon.epoch, mon.iter)) q.put(None) else: net.eval() with T.set_grad_enabled(False): losses, accuracies = [], [] for itt, batch in enumerate(eval_loader): batch = nnt.utils.batch_to_device(batch, device[0]) loss, acc = get_loss(net_eval if use_jit else net, *batch) losses.append(nnt.utils.to_numpy(loss)) accuracies.append(nnt.utils.to_numpy(acc)) mon.plot('test-loss', np.mean(losses)) mon.plot('test-accuracy', np.mean(accuracies)) mon.dump('training.pt', states, method='torch', keep=10) if scheduler is not None: scheduler.step() if no_wait_eval: q.put('DONE') eval_proc.join() print('Training finished!')
def launch_actor(id_actor, args, redis_servor): print("id actor = ", id_actor) env_actor = Env(args) start_time_actor = time.time() if args.continue_experiment: print( "We are restarting a stopped experience with a model trained for " + str(args.step_actors_already_done) + "steps" ) initial_T_actor = int( (args.step_actors_already_done - args.memory_capacity) / args.nb_actor ) print("initial T actor equal ", initial_T_actor) step_to_start_sleep = int(args.step_actors_already_done / args.nb_actor) else: initial_T_actor = 0 step_to_start_sleep = int(args.learn_start / args.nb_actor) T_actor = initial_T_actor index_actor_in_memory = 0 timestep = 0 actor_buffer = [] mem_actor = ReplayRedisMemory(args, redis_servor) actor = Actor(args, env_actor.action_space(), redis_servor) done_actor = True tab_state = [] tab_action = [] tab_reward = [] tab_nonterminal = [] # We want to warn the user when the agent reachs 100 hours of gameplay continuously improving # score. On thoses game the agent is superhuman (and learning should be stop maybe?) if not args.disable_SABER_mode: # SABER mode: length episode can be infinite (100 hours) step_100_hours = int(args.max_episode_length / args.action_repeat) - 1 if id_actor == 0: reward_buffer = RewardBuffer(args.evaluation_episodes, args.action_repeat) while T_actor <= (args.T_max / args.nb_actor): if done_actor: if not args.disable_SABER_mode and timestep >= step_100_hours: print("Agent reachs 100 hours of gameplay while continuously improving score!" "Agent is superhuman (happened only on Atlantis, Defender and Asteroids)." "Learning could be stopped now...") if id_actor == 0 and T_actor > initial_T_actor: reward_buffer.update_score_episode_buffer(timestep) timestep = 0 state_buffer_actor = env_actor.reset() done_actor = False if T_actor % args.replay_frequency == 0: actor.reset_noise() # Draw a new set of noisy weights if T_actor < args.learn_start / args.nb_actor: # Do random actions before learning start action = random.randint(0, env_actor.action_space() - 1) else: # Choose an action greedily (with noisy weights) action = actor.act(state_buffer_actor) next_state_buffer_actor, reward, done_actor = env_actor.step(action) # Step if args.render and id_actor == 0: env_actor.render() if id_actor == 0: reward_buffer.update_current_reward_buffer(timestep, reward) if args.reward_clip > 0: reward = max(min(reward, args.reward_clip), -args.reward_clip) # Clip rewards actor_buffer.append([timestep, state_buffer_actor[-1], action, reward, done_actor]) if len(tab_state) == 0: for current_state in state_buffer_actor: tab_state.append(current_state) else: tab_state.append(state_buffer_actor[-1]) tab_action.append(action) tab_reward.append(reward) tab_nonterminal.append(not done_actor) if T_actor % args.log_interval == 0: log(f"T = {T_actor} / {args.T_max}") duration_actor = time.time() - start_time_actor print(f"Time between 2 log_interval for actor {id_actor} ({duration_actor:.3f} sec)") start_time_actor = time.time() if T_actor % args.weight_synchro_frequency == 0: actor.load_weight_from_redis() # We want to send actor buffer in the redis memory with right initialized priorities if len(actor_buffer) >= args.length_actor_buffer: if (not mem_actor.transitions.actor_full) and ( (index_actor_in_memory + len(actor_buffer)) >= mem_actor.transitions.actor_capacity ): redis_servor.set(cst.IS_FULL_ACTOR_STR + str(id_actor), 1) mem_actor.transitions.actor_full = True priorities_buffer = actor.compute_priorities( tab_state, tab_action, tab_reward, tab_nonterminal, mem_actor.priority_exponent ) # We dont have the next_states for the last n_step states in the buffer so we just # set their priorities to max priorities (should be 3/args.length_buffer_actor # experience so a bit negligeable...) max_priority = np.float64(redis_servor.get(cst.MAX_PRIORITY_STR)) last_priorities = np.ones(mem_actor.n) * max_priority all_priorities = np.concatenate((priorities_buffer, last_priorities)) p = Process( target=mem_actor.transitions.append_actor_buffer, args=(actor_buffer, index_actor_in_memory, id_actor, all_priorities, T_actor), ) p.daemon = True p.start() index_actor_in_memory = ( index_actor_in_memory + len(actor_buffer) ) % args.actor_capacity # Make actors sleep to wait learner if synchronization is on! if args.synchronize_actors_with_learner and (T_actor >= step_to_start_sleep): # Actors are always faster than learner T_learner = int(redis_servor.get(cst.STEP_LEARNER_STR)) while ( T_learner + 2 * args.weight_synchro_frequency <= T_actor * args.nb_actor ): # We had a bug at the end because learner don't put in redis memory that # he reached 50 M and actor was sleeping all time... time.sleep(cst.TIME_TO_SLEEP) T_learner = int(redis_servor.get(cst.STEP_LEARNER_STR)) actor_buffer = [] tab_state = [] tab_action = [] tab_reward = [] tab_nonterminal = [] # Update target network if T_actor % args.target_update == 0: actor.update_target_net() # Plot and dump in csv every evaluation_interval steps (there is in fact not any # evaluation done, we just keep track of score while training) if ( T_actor % (args.evaluation_interval / args.nb_actor) == 0 and id_actor == 0 and T_actor >= (initial_T_actor + args.evaluation_interval / 2) ): dump_in_csv_and_plot_reward(redis_servor, args, T_actor, reward_buffer, actor) state_buffer_actor = next_state_buffer_actor timestep += 1 T_actor += 1
def bwas_cpp(args, env: Environment, states: List[State], results_file: str): assert (args.env.upper() in [ 'CUBE3', 'CUBE4', 'PUZZLE15', 'PUZZLE24', 'PUZZLE35', 'PUZZLE48', 'LIGHTSOUT7' ]) # Make c++ socket socket_name: str = "%s_cpp_socket" % results_file.split(".")[0] try: os.unlink(socket_name) except OSError: if os.path.exists(socket_name): raise sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) sock.bind(socket_name) # Get state dimension if args.env.upper() == 'CUBE3': state_dim: int = 54 elif args.env.upper() == 'PUZZLE15': state_dim: int = 16 elif args.env.upper() == 'PUZZLE24': state_dim: int = 25 elif args.env.upper() == 'PUZZLE35': state_dim: int = 36 elif args.env.upper() == 'PUZZLE48': state_dim: int = 49 elif args.env.upper() == 'LIGHTSOUT7': state_dim: int = 49 else: raise ValueError("Unknown c++ environment: %s" % args.env) # start heuristic proc num_parallel: int = len(os.environ['CUDA_VISIBLE_DEVICES'].split(",")) device, devices, on_gpu = nnet_utils.get_device() heur_fn_i_q, heur_fn_o_qs, heur_procs = nnet_utils.start_heur_fn_runners( num_parallel, args.model_dir, device, on_gpu, env, all_zeros=False, clip_zero=True, batch_size=args.nnet_batch_size) nnet_utils.heuristic_fn_par(states, env, heur_fn_i_q, heur_fn_o_qs) # initialize heur_proc = Process(target=cpp_listener, args=(sock, args, env, state_dim, heur_fn_i_q, heur_fn_o_qs)) heur_proc.daemon = True heur_proc.start() time.sleep(2) # give socket time to intialize solns: List[List[int]] = [] paths: List[List[State]] = [] times: List = [] num_nodes_gen: List[int] = [] for state_idx, state in enumerate(states): # Get string rep of state if args.env.upper() == "CUBE3": state_str: str = " ".join([str(x) for x in state.colors]) elif args.env.upper() in [ "PUZZLE15", "PUZZLE24", "PUZZLE35", "PUZZLE48" ]: state_str: str = " ".join([str(x) for x in state.tiles]) elif args.env.upper() in ["LIGHTSOUT7"]: state_str: str = " ".join([str(x) for x in state.tiles]) else: raise ValueError("Unknown c++ environment: %s" % args.env) popen = Popen([ './cpp/parallel_weighted_astar', state_str, str(args.weight), str(args.batch_size), socket_name, args.env, "0" ], stdout=PIPE, stderr=PIPE, bufsize=1, universal_newlines=True) lines = [] for stdout_line in iter(popen.stdout.readline, ""): stdout_line = stdout_line.strip('\n') lines.append(stdout_line) if args.verbose: sys.stdout.write("%s\n" % stdout_line) sys.stdout.flush() moves = [int(x) for x in lines[-5].split(" ")[:-1]] soln = [x for x in moves][::-1] num_nodes_gen_idx = int(lines[-3]) solve_time = float(lines[-1]) # record solution information path: List[State] = [state] next_state: State = state transition_costs: List[float] = [] for move in soln: next_states, tcs = env.next_state([next_state], move) next_state = next_states[0] tc = tcs[0] path.append(next_state) transition_costs.append(tc) solns.append(soln) paths.append(path) times.append(solve_time) num_nodes_gen.append(num_nodes_gen_idx) path_cost: float = sum(transition_costs) # check soln assert search_utils.is_valid_soln(state, soln, env) # print to screen print("State: %i, SolnCost: %.2f, # Moves: %i, " "# Nodes Gen: %s, Time: %.2f" % (state_idx, path_cost, len(soln), format(num_nodes_gen_idx, ","), solve_time)) os.unlink(socket_name) nnet_utils.stop_heuristic_fn_runners(heur_procs, heur_fn_i_q) return solns, paths, times, num_nodes_gen
def wrapper(*args, **kwargs): func = Process(target=f, args=args, kwargs=kwargs) func.daemon = False func.start() return func
assert f1[-1] == f2[-1] last = f1[-1] f = np.vstack((f1[:-1], f2[:-1])) f = normalize(f, axis=0) f = np.vstack((f, [last])) # if not osp.exists(osp.dirname(dstfn)): mkdir_p(osp.dirname(dstfn), delete=False, verbose=False) save_mat(dstfn, f) queue = Queue(60) lock = Lock() consumers = [] for i in range(12): p = Process(target=consumer, args=(queue, lock)) p.daemon = True consumers.append(p) for c in consumers: c.start() comb_from_ = comb_from[0] assert osp.exists(f'{fea_root}/{comb_from_}') for fn in glob.glob(f'{fea_root}/{comb_from_}/facescrub/**/*.bin', recursive=True): fn2 = fn.replace(comb_from[0], comb_from[1]) assert osp.exists(fn2), fn2 fn3 = None # fn3 = fn.replace(comb_from[0], comb_from[2]) dstfn = fn.replace(comb_from[0], dst_name) queue.put((fn, fn2, fn3, dstfn)) for ind, imgfn in enumerate(imgfns): if ind % 99 == 0: print(ind, len(imgfns))
td3_trainer.target_policy_net.share_memory() ShareParameters(td3_trainer.q_optimizer1) ShareParameters(td3_trainer.q_optimizer2) ShareParameters(td3_trainer.policy_optimizer) rewards_queue = mp.Queue( ) # used for get rewards from all processes and plot the curve num_workers = 4 # or: mp.cpu_count() processes = [] rewards = [] for i in range(num_workers): process = Process(target=worker, args=(i, td3_trainer, rewards_queue, replay_buffer, max_episodes, max_steps, batch_size, explore_steps, \ update_itr, explore_noise_scale, eval_noise_scale, reward_scale, DETERMINISTIC, hidden_dim, model_path)) # the args contain shared and not shared process.daemon = True # all processes closed when the main stops processes.append(process) [p.start() for p in processes] while True: # keep geting the episode reward from the queue r = rewards_queue.get() if r is not None: rewards.append(r) else: break if len(rewards) % 50 == 0 and len(rewards) > 0: plot(rewards) np.savetxt('rewards_thre_random02.txt', np.array(rewards)[:, np.newaxis], fmt='%.4f',
def main(): args = get_args() log_dir = create_log_dir(args) if not args.test: writer = SummaryWriter(log_dir) else: writer = None SEED = 721 if args.ram_obs or args.env == "slimevolley_v0": obs_type = 'ram' else: obs_type = 'rgb_image' env = make_env(args.env, SEED, obs_type=obs_type) state_spaces = env.observation_spaces action_spaces = env.action_spaces print('state_spaces: ', state_spaces, ', action_spaces: ', action_spaces) learner_args = {'device': args.device} env.reset() print(env.agents) agents = env.agents if args.train_both: fixed_agents = [] else: fixed_agents = [ 'first_0' ] # SlimeVolley: opponent is the first, the second agent is the learnable one if obs_type == 'ram': model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'MLP', fixed_agents, learner_args, **hyperparams).to(args.device) else: # model = PPODiscrete(state_space, action_space, 'CNN', learner_args, **hyperparams).to(device) model = MultiPPODiscrete(agents, state_spaces, action_spaces, 'CNN', fixed_agents, learner_args, **hyperparams).to(args.device) load_model(model, args) for individual_model in model.agents.values(): individual_model.policy.share_memory() individual_model.policy_old.share_memory() individual_model.value.share_memory() ShareParameters(individual_model.optimizer) path = 'model/' + args.env os.makedirs(path, exist_ok=True) if args.fictitious: path = path + '/fictitious_' processes = [] for p in range(args.num_envs): process = Process(target=parallel_rollout, args=(p, args.env, model, writer, max_eps, \ max_timesteps, selfplay_interval,\ args.render, path, args.against_baseline, \ args.selfplay, args.fictitious, SEED)) # the args contain shared and not shared process.daemon = True # all processes closed when the main stops processes.append(process) [p.start() for p in processes] [p.join() for p in processes] # finished at the same time env.close()