def run(config, seed, device, logdir): set_global_seeds(seed) train_loader, test_loader = make_dataset(config) if config['nn.type'] == 'VAE': model = VAE(config, device) elif config['nn.type'] == 'ConvVAE': model = ConvVAE(config, device) optimizer = optim.Adam(model.parameters(), lr=config['lr']) engine = Engine(config, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader) train_logs = [] eval_logs = [] for epoch in range(config['train.num_epoch']): train_logger = engine.train(epoch, logdir=logdir) train_logs.append(train_logger.logs) eval_logger = engine.eval(epoch, logdir=logdir) eval_logs.append(eval_logger.logs) pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl') return None
def run(config, seed, device, logdir): set_global_seeds(seed) print('Initializing...') agent = Agent(config, make_env(config, seed), device) es = CMAES([config['train.mu0']]*agent.num_params, config['train.std0'], {'popsize': config['train.popsize'], 'seed': seed}) train_logs = [] checkpoint_count = 0 with ProcessPoolExecutor(max_workers=config['train.popsize'], initializer=initializer, initargs=(config, seed, device)) as executor: print('Finish initialization. Training starts...') for generation in range(config['train.generations']): start_time = time.perf_counter() solutions = es.ask() out = list(executor.map(fitness, solutions, chunksize=2)) Rs, Hs = zip(*out) es.tell(solutions, [-R for R in Rs]) logger = Logger() logger('generation', generation+1) logger('num_seconds', round(time.perf_counter() - start_time, 1)) logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n')) logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n')) logger('fbest', es.result.fbest) train_logs.append(logger.logs) if generation == 0 or (generation+1)%config['log.freq'] == 0: logger.dump(keys=None, index=0, indent=0, border='-'*50) if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))): agent.from_vec(tensorify(es.result.xbest, 'cpu')) agent.checkpoint(logdir, generation+1) checkpoint_count += 1 pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl') return None
def run(config, seed, device, logdir): set_global_seeds(seed) env = make_env(config, seed, 'train') eval_env = make_env(config, seed, 'eval') random_agent = RandomAgent(config, env, device) if config['agent.use_td3']: agent = TD3Agent(config, env, device) else: agent = DDPGAgent(config, env, device) runner = EpisodeRunner() replay = ReplayBuffer(env, config['replay.capacity'], device) engine = Engine(config, agent=agent, random_agent=random_agent, env=env, eval_env=eval_env, runner=runner, replay=replay, logdir=logdir) train_logs, eval_logs = engine.train() pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl') return None
def run(config, seed, device): set_global_seeds(seed) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) env = make_env(config, seed) env = VecMonitor(env) if config['env.standardize_obs']: env = VecStandardizeObservation(env, clip=5.) if config['env.standardize_reward']: env = VecStandardizeReward(env, clip=10., gamma=config['agent.gamma']) agent = Agent(config, env, device) runner = EpisodeRunner(reset_on_call=False) engine = Engine(config, agent=agent, env=env, runner=runner) train_logs = [] for i in count(): if agent.total_timestep >= config['train.timestep']: break train_logger = engine.train(i) train_logs.append(train_logger.logs) if i == 0 or (i + 1) % config['log.freq'] == 0: train_logger.dump(keys=None, index=0, indent=0, border='-' * 50) if i == 0 or (i + 1) % config['checkpoint.freq'] == 0: agent.checkpoint(logdir, i + 1) agent.checkpoint(logdir, i + 1) pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') return None
def run(config, seed, device, logdir): set_global_seeds(seed) queue = mp.Queue(maxsize=100) env = make_env(config, seed, 'train') agent = Agent(config, env, device) agent.share_memory() runner = EpisodeRunner(reset_on_call=False) engine = Engine(config, agent=agent, env=env, runner=runner) learner_process = mp.Process(target=learner, args=(config, logdir, agent, engine, queue)) actor_processes = [ mp.Process(target=actor, args=(config, seed, make_env, agent, runner, queue)) for _ in range(config['agent.num_actors']) ] evaluator_process = mp.Process(target=evaluator, args=(config, logdir, seed, make_env, agent)) learner_process.start() print('Learner started !') [p.start() for p in actor_processes] print('Actors started !') evaluator_process.start() print('Evaluator started !') evaluator_process.join() [p.join() for p in actor_processes] learner_process.join() return None
def __call__(self, config, seed, device): set_global_seeds(seed) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) es = ESMaster(config, ESWorker, logdir=logdir) es() return None
def run(config, seed, device, logdir): set_global_seeds(seed) torch.set_num_threads(1) # VERY IMPORTANT TO AVOID GETTING STUCK print('Initializing...') agent = Agent(config, make_env(config, seed, 'eval'), device) es = OpenAIES( [config['train.mu0']] * agent.num_params, config['train.std0'], { 'popsize': config['train.popsize'], 'seed': seed, 'sigma_scheduler_args': config['train.sigma_scheduler_args'], 'lr': config['train.lr'], 'lr_decay': config['train.lr_decay'], 'min_lr': config['train.min_lr'], 'antithetic': config['train.antithetic'], 'rank_transform': config['train.rank_transform'] }) train_logs = [] checkpoint_count = 0 with Pool(processes=config['train.popsize'] // config['train.worker_chunksize']) as pool: print('Finish initialization. Training starts...') for generation in range(config['train.generations']): t0 = time.perf_counter() solutions = es.ask() data = [(config, seed, device, solution) for solution in solutions] out = pool.map(CloudpickleWrapper(fitness), data, chunksize=config['train.worker_chunksize']) Rs, Hs = zip(*out) es.tell(solutions, [-R for R in Rs]) logger = Logger() logger('generation', generation + 1) logger('num_seconds', round(time.perf_counter() - t0, 1)) logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n')) logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n')) logger('fbest', es.result.fbest) train_logs.append(logger.logs) if generation == 0 or (generation + 1) % config['log.freq'] == 0: logger.dump(keys=None, index=0, indent=0, border='-' * 50) if (generation + 1) >= int(config['train.generations'] * (checkpoint_count / (config['checkpoint.num'] - 1))): agent.from_vec(tensorify(es.result.xbest, 'cpu')) agent.checkpoint(logdir, generation + 1) checkpoint_count += 1 pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') return None
def run(config, seed, device, logdir): set_global_seeds(seed) env = make_env(config, seed) env = VecMonitor(env) env = VecStepInfo(env) eval_env = make_env(config, seed) eval_env = VecMonitor(eval_env) agent = Agent(config, env, device) replay = ReplayBuffer(env, config['replay.capacity'], device) engine = Engine(config, agent=agent, env=env, eval_env=eval_env, replay=replay, logdir=logdir) train_logs, eval_logs = engine.train() pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl') return None
def __call__(self, config, seed, device): set_global_seeds(seed) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) train_loader, test_loader = self.make_dataset(config) model = VAE(config=config, device=device) model.train_loader = train_loader model.test_loader = test_loader model.optimizer = optim.Adam(model.parameters(), lr=1e-3) engine = Engine(agent=model, runner=None, config=config) for epoch in range(config['train.num_epoch']): train_output = engine.train(n=epoch) engine.log_train(train_output, logdir=logdir, epoch=epoch) eval_output = engine.eval(n=epoch) engine.log_eval(eval_output, logdir=logdir, epoch=epoch) return None
def run(config, seed, device, logdir): set_global_seeds(seed) env = make_env(config, seed, 'train') agent = Agent(config, env, device) runner = StepRunner(reset_on_call=False) engine = Engine(config, agent=agent, env=env, runner=runner) train_logs = [] checkpoint_count = 0 for i in count(): if agent.total_timestep >= config['train.timestep']: break train_logger = engine.train(i) train_logs.append(train_logger.logs) if i == 0 or (i + 1) % config['log.freq'] == 0: train_logger.dump(keys=None, index=0, indent=0, border='-' * 50) if agent.total_timestep >= int(config['train.timestep'] * (checkpoint_count / (config['checkpoint.num'] - 1))): agent.checkpoint(logdir, i + 1) checkpoint_count += 1 pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') return None
def __call__(self, config, seed, device): set_global_seeds(seed) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) if config['env.time_aware_obs']: kwargs = {'extra_wrapper': [TimeAwareObservation]} else: kwargs = {} env = make_vec_env(SerialVecEnv, make_gym_env, config['env.id'], config['train.N'], seed, monitor=True, **kwargs) if config['eval.independent']: eval_env = make_vec_env(SerialVecEnv, make_gym_env, config['env.id'], config['eval.N'], seed) if config['env.clip_action']: env = VecClipAction(env) if config['eval.independent']: eval_env = VecClipAction(eval_env) if config[ 'env.standardize']: # running averages of observation and reward env = VecStandardize( venv=env, use_obs=True, use_reward=False, # A2C specific clip_obs=10., clip_reward=10., gamma=0.99, eps=1e-8) env_spec = EnvSpec(env) agent = Agent(config, env_spec, device) runner = RollingSegmentRunner(config, agent, env) if config['eval.independent']: engine = Engine(agent, runner, config, eval_env=eval_env) else: engine = Engine(agent, runner, config) train_logs = [] eval_logs = [] for i in count(): if 'train.iter' in config and i >= config[ 'train.iter']: # enough iterations break elif 'train.timestep' in config and agent.total_T >= config[ 'train.timestep']: # enough timesteps break train_output = engine.train(i) if i == 0 or (i + 1) % config['log.interval'] == 0: train_log = engine.log_train(train_output) train_logs.append(train_log) if config['eval.independent']: with torch.no_grad(): # disable grad, save memory eval_output = engine.eval(n=i) eval_log = engine.log_eval(eval_output) eval_logs.append(eval_log) pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl') return None