def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ # create multiple envs env_single = env env_gen = env_generator("LunarLanderContinuous-v2", args) env_multi = make_envs(env_gen, n_envs=hyper_params["N_WORKERS"]) # create models hidden_sizes_actor = [256, 256] hidden_sizes_critic = [256, 256] actor = GaussianDist( input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes_actor, hidden_activation=torch.tanh, ).to(device) critic = MLP( input_size=state_dim, output_size=1, hidden_sizes=hidden_sizes_critic, hidden_activation=torch.tanh, ).to(device) # create optimizer actor_optim = optim.Adam( actor.parameters(), lr=hyper_params["LR_ACTOR"], weight_decay=hyper_params["WEIGHT_DECAY"], ) critic_optim = optim.Adam( critic.parameters(), lr=hyper_params["LR_CRITIC"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # make tuples to create an agent models = (actor, critic) optims = (actor_optim, critic_optim) # create an agent agent = Agent(env_single, env_multi, args, hyper_params, models, optims) # run if args.test: agent.test() else: agent.train()
def run(env: gym.Env, args: argparse.Namespace, state_dim: int, action_dim: int): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ # create multiple envs env_single = env env_gen = env_generator("LunarLander-v2", args) env_multi = make_envs(env_gen, n_envs=hyper_params["N_WORKERS"]) # create model hidden_sizes = [128, 64] dqn = DuelingMLP(input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes).to(device) dqn_target = DuelingMLP(input_size=state_dim, output_size=action_dim, hidden_sizes=hidden_sizes).to(device) dqn_target.load_state_dict(dqn.state_dict()) # create optimizer dqn_optim = optim.Adam( dqn.parameters(), lr=hyper_params["LR_DQN"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # make tuples to create an agent models = (dqn, dqn_target) # create an agent agent = Agent(env_single, env_multi, args, hyper_params, models, dqn_optim) # run if args.test: agent.test() else: agent.train()
def run(env: gym.Env, args: argparse.Namespace): """Run training or test. Args: env (gym.Env): openAI Gym environment with continuous action space args (argparse.Namespace): arguments including training settings state_dim (int): dimension of states action_dim (int): dimension of actions """ # create multiple envs # configure environment so that it works for discrete actions env_single = env_utils.set_env(env, args, WRAPPERS) env_gen = env_generator("Pong-v0", args, WRAPPERS) env_multi = make_envs(env_gen, n_envs=hyper_params["N_WORKERS"]) # create a model action_dim = env.action_space.n hidden_sizes = [256, 256] def get_cnn_model(): cnn_model = DuelingCNN( cnn_layers=[ CNNLayer( input_size=4, output_size=32, kernel_size=5, pulling_fn=nn.MaxPool2d(3), ), CNNLayer( input_size=32, output_size=32, kernel_size=3, pulling_fn=nn.MaxPool2d(3), ), CNNLayer( input_size=32, output_size=64, kernel_size=2, pulling_fn=nn.MaxPool2d(3), ), ], fc_layers=DuelingMLP( input_size=256, output_size=action_dim, hidden_sizes=hidden_sizes ), ).to(device) return cnn_model dqn = get_cnn_model() dqn_target = get_cnn_model() dqn_target.load_state_dict(dqn.state_dict()) # create optimizer dqn_optim = optim.Adam( dqn.parameters(), lr=hyper_params["LR_DQN"], weight_decay=hyper_params["WEIGHT_DECAY"], ) # make tuples to create an agent models = (dqn, dqn_target) # create an agent agent = Agent(env_single, env_multi, args, hyper_params, models, dqn_optim) agent.env_name = "Pong-v0" # run if args.test: agent.test() else: agent.train()