def train(env_id, num_timesteps, seed, policy, lrschedule, num_cpu): def make_env(rank): def _thunk(): env = gym.make(env_id) env.seed(seed + rank) env = bench.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) gym.logger.setLevel(logging.WARN) return env return _thunk set_global_seeds(seed) env = SubprocVecEnv([make_env(i) for i in range(num_cpu)]) if policy == 'cnn': policy_fn = CnnPolicy elif policy == 'lstm': policy_fn = LstmPolicy elif policy == 'radlstm': policy_fn = RadLstmPolicy learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule) env.close()
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env): if policy == 'cnn': policy_fn = CnnPolicy elif policy == 'lstm': policy_fn = LstmPolicy elif policy == 'lnlstm': policy_fn = LnLstmPolicy env = VecFrameStack(make_atari_env(env_id, num_env, seed), NUM_CPU) learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule) env.close()
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env): if policy == 'cnn': policy_fn = CnnPolicy elif policy == 'lstm': policy_fn = LstmPolicy elif policy == 'lnlstm': policy_fn = LnLstmPolicy dict = {} dict['clip_rewards']=False env = VecFrameStack(make_atari_env(env_id, num_env, seed, wrapper_kwargs=dict), 4) learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule) env.close()
def train(map_name, num_timesteps, batch_steps, seed, network, ar, lr, lrschedule, screen_size, minimap_size, step_mul, num_cpu, optimizer, ent_coef, vl_coef, max_grad_norm): maps.get(map_name) # Assert the map exists. log_path = './experiments/%s/' % (time.strftime("%m%d_%H%M_") + map_name) make_path(log_path) make_path("%sreplay" % log_path) def make_env(rank): def _thunk(): agent_interface = features.parse_agent_interface_format( feature_screen=64, feature_minimap=64) env = sc2_env.SC2Env( map_name=map_name, step_mul=step_mul, agent_interface_format=agent_interface, # screen_size_px=(screen_size, screen_size), # minimap_size_px=(minimap_size, minimap_size), visualize=False) return env return _thunk set_global_seeds(seed) log_file = open("%sconfig.log" % log_path, "a+") log_file.write("Map Name: %s\n" % map_name) log_file.write("Optimizer: %s\n" % optimizer) log_file.write("Network: %s\n" % network) log_file.write("Learning Rate: %f\n" % lr) log_file.write("Entropy Coefficient: %f\n" % ent_coef) log_file.write("Value Function Coefficient: %f\n" % vl_coef) log_file.write("Maximum Gradient Norm: %f\n" % max_grad_norm) log_file.write("Screen Size: %d\n" % screen_size) log_file.write("Minimap Size: %d\n" % minimap_size) log_file.write("Batch Steps: %d\n" % batch_steps) log_file.close() learn(network, log_path, make_env, total_timesteps=num_timesteps, nsteps=batch_steps, ent_coef=ent_coef, max_grad_norm=max_grad_norm, optimizer=optimizer, vl_coef=vl_coef, ar=ar, lr=lr, num_cpu=num_cpu)
def train(env_id, num_timesteps, num_cpu): def make_env(rank): def _thunk(): env = make_atari(env_id) env.seed(SEED + rank) gym.logger.setLevel(logging.WARN) env = wrap_deepmind(env) # wrap the env one more time for getting total reward env = Monitor(env, rank) return env return _thunk env = SubprocVecEnv([make_env(i) for i in range(num_cpu)]) learn(CNN, env, SEED, total_timesteps=int(num_timesteps * 1.1)) env.close() pass
def train(env_id, num_timesteps, seed, policy, lrschedule, num_env, args): if policy == 'i2a': policy_fn = I2A elif policy == 'cnn': policy_fn = CnnPolicy elif policy == 'lstm': policy_fn = LstmPolicy elif policy == 'lnlstm': policy_fn = LnLstmPolicy env = VecFrameStack( make_atari_env('MsPacmanNoFrameskip-v0', num_env, seed), 4) learn(policy_fn, env, seed, total_timesteps=int(num_timesteps * 1.1), lrschedule=lrschedule, args=args) env.close()
def main(env_id, num_timesteps, seed, policy, nstack, nsteps, lrschedule, optimizer, num_cpu, model_file, use_static_wrapper, use_encoded_imagination, use_decoded_imagination): num_timesteps //= 4 assert not (use_encoded_imagination and use_decoded_imagination) def make_env(rank): def _thunk(): env = gym.make(env_id) env.seed(seed + rank) if use_static_wrapper: env = StaticWrapper(env) if policy == 'cnn' or use_encoded_imagination: env = RenderWrapper(env, 400, 600) env = DownsampleWrapper(env, 4) if use_encoded_imagination or use_decoded_imagination: env = FrameStack(env, 3) if use_encoded_imagination: env = EncodedImaginationWrapper(env, model_file, num_cpu) if use_decoded_imagination: env = DecodedImaginationWrapper(env, model_file, num_cpu) gym.logger.setLevel(logging.WARN) return env return _thunk set_global_seeds(seed) env = SubprocVecEnv([make_env(i) for i in range(num_cpu)]) if policy == 'fc': policy_fn = FcPolicy if policy == 'cnn': policy_fn = CnnPolicy learn(policy_fn, env, seed, nsteps=nsteps, nstack=nstack, total_timesteps=num_timesteps, lrschedule=lrschedule, optimizer=optimizer, max_episode_length=195) env.close()
def train(config, num_frames, seed, policy, lrschedule, num_cpu, ckpt, nsteps, start_port=8000, dfn=all): num_timesteps = int(num_frames / 4 * 1.1) # divide by 4 due to frameskip, then do a little extras so episodes end def make_env(rank): def _thunk(): port = rank + start_port gym.logger.setLevel(logging.WARN) return wrap_ma_doom(config, NUM_PLAYERS, port) return _thunk set_global_seeds(seed) env = SubprocVecEnv([make_env(i) for i in range(num_cpu)], dfn) if policy == 'comm': policy_fn = MACommPolicy elif policy == 'commsep': policy_fn = MACommSepCriticPolicy elif policy == 'cnn': policy_fn = MACnnPolicy elif policy == 'recon': policy_fn = MAReconPolicy elif policy == 'lnlstm': raise NotImplemented time.sleep(num_cpu * 1) print("creation complete, start running!") return learn(policy_fn, env, seed, nsteps=nsteps, checkpoint=ckpt, total_timesteps=num_timesteps, lrschedule=lrschedule)
def train(config, num_frames, seed, policy, lrschedule, num_cpu, ckpt, nsteps, dfn=all): num_timesteps = int(num_frames / 4 * 1.1) # divide by 4 due to frameskip, then do a little extras so episodes end def make_env(rank): def _thunk(): gym.logger.setLevel(logging.WARN) return wrap_predator_prey(**config) return _thunk set_global_seeds(seed) env = SubprocVecEnv([make_env(i) for i in range(num_cpu)], dfn, nplayers=config["npredator"]) if policy == 'nmap': policy_fn = MANMapPolicy elif policy == 'cnn': policy_fn = MACnnPolicy elif policy == 'lnlstm': raise NotImplemented time.sleep(num_cpu * 0.5) print("creation complete, start running!") logs_path = "log/" + policy_fn.__name__ + "_" + str(config["po_radius"]) return learn(policy_fn, env, seed, logs_path, nplayers=config["npredator"], nsteps=nsteps, checkpoint=ckpt, total_timesteps=num_timesteps, lrschedule=lrschedule, eval_env_fn=make_env(0))