def main(): num_envs = NUM_ENV memory = mp.Queue(maxsize=NUM_ENV) cond = mp.Condition() # make agent and share memory actor_agent = ActorAgent() learner_agent = LearnerAgent() # sync model actor_agent.update_actor_model(learner_agent.model) # make envs envs = [Environment(gym.make('CartPole-v1'), i) for i in range(num_envs)] # Learner Process(only Learn) learn_proc = mp.Process(target=learner, args=(cond, memory, actor_agent, learner_agent)) # Runner Process(just run, not learn) runners = [] for idx, env in enumerate(envs): run_proc = mp.Process(target=runner, args=(env, cond, memory, actor_agent)) runners.append(run_proc) run_proc.start() learn_proc.start() for proc in runners: proc.join() learn_proc.join()
def mp_trainer(np, model, grad_buffer, optimizer, it_num=0): if np is None: print("can not get num of process!") sys.exit(-1) #np trainers and an optmizer Barrier = mp.Barrier(np + 1) Condition = mp.Condition() p_opt_args = (np, it_num, Barrier, optimizer, Condition, model, grad_buffer) p_opt = mp.Process(target=optimizer_process, args=p_opt_args) p_opt.start() processes = [] processes.append(p_opt) shared_score = torch.FloatTensor([0]) shared_score.share_memory_() for id in range(np): p_trainer_args = (id, it_num, Barrier, optimizer, Condition, model, grad_buffer, shared_score, np) p_trainer = mp.Process(target=trainer_process, args=p_trainer_args) p_trainer.start() processes.append(p_trainer) for p in processes: p.join()
def __init__(self, keywords, num_keywords=-1, batch_size=5, num_frames=5, num_files=50, num_file_offset=0, is_training=False, is_test=False, test_file_name="", total_epoch=10, is_multithread=False): self.current_epoch = 0 self.current_iter = 0 self.prepared_batch_iter = 0 self.total_epoch = total_epoch self.is_multithread = is_multithread self.batch_size = batch_size self.num_frames = num_frames self.keywords = keywords self.num_files = num_files self.num_file_offset = num_file_offset self.is_training = is_training self.tf = self.transform_frame() self.is_test = is_test self.dirs_to_check = os.listdir('./training') if len(self.keywords) != 0: self.dirs_to_check = [dir for dir in self.dirs_to_check if dir in self.keywords] elif num_keywords != -1: self.dirs_to_check = self.dirs_to_check[:num_keywords] self.num_class = len(self.dirs_to_check) self.entire_file_list = [] if not self.is_test: dir_index = 0 for dir in self.dirs_to_check: file_names = os.listdir('./training/' + dir) for file_name in file_names[self.num_file_offset:self.num_file_offset + self.num_files]: self.entire_file_list.append(('./training/' + dir + '/' + file_name, dir_index)) dir_index += 1 if self.is_training: shuffle(self.entire_file_list) else: self.entire_file_list.append(test_file_name) print(self.dirs_to_check) if is_multithread: self.cond = mp.Condition() self.worker = mp.Process(target=self.prepare_mini_batch) # batches that are ready to be served :) # Prepare up to 20 batches. self.batch_queue = mp.Queue(maxsize=5) # Start the batch processing queue! self.worker.start()
def __init__(self, context: ModelTrainerContext, model_dir: Path, feature_extractor: str, cache: FeatureCache, linear_only: bool, search_id: SearchId, trainer_index: int): super().__init__(context, model_dir, feature_extractor, cache, True) self._search_id = search_id self._trainer_index = trainer_index self._param_grid = [{'C': C_VALUES, 'kernel': ['linear']}] if not linear_only: self._param_grid.append({'C': C_VALUES, 'gamma': GAMMA_VALUES, 'kernel': ['rbf']}) self._train_condition = mp.Condition() self._train_id = None self._train_results = {}
def __init__(self, path, classes=None, shuffle=False, input_trans=transforms.Compose([transforms.ToTensor()]), target_trans=None, ext=['.png', '.jpg', '.jpeg'], **kwargs): self.path = Path(path) self.classes = classes self.input_trans = input_trans self.target_trans = target_trans self.samples = [] self.cache = {} self.cv = mp.Condition(mp.RLock()) self.scan(ext) self.shuffled = torch.randperm(len( self.samples)).tolist() if shuffle else list( range(len(self.samples)))
def __init__(self, context: ModelTrainerContext, feature_extractor: str, cache: FeatureCache, probability: bool, linear_only: bool, search_id: SearchId, trainer_index: int): super().__init__(context, feature_extractor, cache, probability) self._linear_only = linear_only self._search_id = search_id self._trainer_index = trainer_index self._param_grid = [] self._param_grid_lock = threading.Lock() self._train_condition = mp.Condition() self._train_id = None self._train_results = {} self._param_grid_event = threading.Event() if self.context.node_index == 0: threading.Thread(target=self._assign_param_grid, name='set-param-grid').start()
def work(param_server, replay_buffer, scheduler, parser_args): worker = Agent(param_server=param_server, replay_buffer=replay_buffer, scheduler=scheduler, parser_args=parser_args) worker.run() def run_server(param_server): param_server.run() if __name__ == '__main__': lock = mp.Lock() worker_cv = mp.Condition(lock) server_cv = mp.Condition(lock) p_args = arg_parser.parse_args() shared_param_server = ParameterServer(parser_args=p_args, worker_cv=worker_cv, server_cv=server_cv) shared_replay_buffer = SharedReplayBuffer(parser_args=p_args, cv=worker_cv) shared_scheduler = SacQ(parser_args=p_args) if p_args.num_workers > 1: processes = [ mp.Process(target=work, args=(shared_param_server, shared_replay_buffer,
train_device = 0 test_device = 1 if args.one_gpu: test_device = 0 if not os.path.exists(os.path.dirname(args.outfile)): if len(os.path.dirname(args.outfile)) != 0: os.makedirs(os.path.dirname(args.outfile)) num_classes = args.num_classes test_freq = 1 total_classes = args.total_classes num_iters = args.num_iters # Conditional variable, shared vars for synchronization cond_var = mp.Condition() train_counter = mp.Value("i", 0) test_counter = mp.Value("i", 0) dataQueue = mp.Queue() all_done = mp.Event() data_mgr = mp.Manager() if not os.path.exists("data_generator/cifar_mean_image.npy"): mean_image = None else: mean_image = np.load("data_generator/cifar_mean_image.npy") K = args.num_exemplars # total number of exemplars model = IncrNet(args, device=train_device, cifar=True)
learn_proc.start() # Runner Process(just run, not learn) pool.map(runner, envs) learn_proc.join() if __name__ == '__main__': env = gym.make('CartPole-v1') # Hyper parameter INPUT = env.observation_space.shape[0] OUTPUT = env.action_space.n DISCOUNT = 0.99 NUM_STEP = 5 NUM_ENV = 8 NUM_WORKER = NUM_ENV EPSILON = 1e-5 ALPHA = 0.99 LEARNING_RATE = 7e-4 env.close() # make agent and share memory agent = A2CAgent() agent.model.share_memory() cond = mp.Condition() memory = mp.Queue(maxsize=NUM_ENV) main()