示例#1
0
def run(env: gym.Env, args: argparse.Namespace, state_dim: int,
        action_dim: int):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    # create multiple envs
    env_single = env
    env_gen = env_generator("LunarLanderContinuous-v2", args)
    env_multi = make_envs(env_gen, n_envs=hyper_params["N_WORKERS"])

    # create models
    hidden_sizes_actor = [256, 256]
    hidden_sizes_critic = [256, 256]

    actor = GaussianDist(
        input_size=state_dim,
        output_size=action_dim,
        hidden_sizes=hidden_sizes_actor,
        hidden_activation=torch.tanh,
    ).to(device)

    critic = MLP(
        input_size=state_dim,
        output_size=1,
        hidden_sizes=hidden_sizes_critic,
        hidden_activation=torch.tanh,
    ).to(device)

    # create optimizer
    actor_optim = optim.Adam(
        actor.parameters(),
        lr=hyper_params["LR_ACTOR"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    critic_optim = optim.Adam(
        critic.parameters(),
        lr=hyper_params["LR_CRITIC"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # make tuples to create an agent
    models = (actor, critic)
    optims = (actor_optim, critic_optim)

    # create an agent
    agent = Agent(env_single, env_multi, args, hyper_params, models, optims)

    # run
    if args.test:
        agent.test()
    else:
        agent.train()
示例#2
0
def run(env: gym.Env, args: argparse.Namespace, state_dim: int,
        action_dim: int):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    # create multiple envs
    env_single = env
    env_gen = env_generator("LunarLander-v2", args)
    env_multi = make_envs(env_gen, n_envs=hyper_params["N_WORKERS"])

    # create model
    hidden_sizes = [128, 64]

    dqn = DuelingMLP(input_size=state_dim,
                     output_size=action_dim,
                     hidden_sizes=hidden_sizes).to(device)

    dqn_target = DuelingMLP(input_size=state_dim,
                            output_size=action_dim,
                            hidden_sizes=hidden_sizes).to(device)
    dqn_target.load_state_dict(dqn.state_dict())

    # create optimizer
    dqn_optim = optim.Adam(
        dqn.parameters(),
        lr=hyper_params["LR_DQN"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # make tuples to create an agent
    models = (dqn, dqn_target)

    # create an agent
    agent = Agent(env_single, env_multi, args, hyper_params, models, dqn_optim)

    # run
    if args.test:
        agent.test()
    else:
        agent.train()
示例#3
0
def run(env: gym.Env, args: argparse.Namespace):
    """Run training or test.

    Args:
        env (gym.Env): openAI Gym environment with continuous action space
        args (argparse.Namespace): arguments including training settings
        state_dim (int): dimension of states
        action_dim (int): dimension of actions

    """
    # create multiple envs
    # configure environment so that it works for discrete actions
    env_single = env_utils.set_env(env, args, WRAPPERS)
    env_gen = env_generator("Pong-v0", args, WRAPPERS)
    env_multi = make_envs(env_gen, n_envs=hyper_params["N_WORKERS"])

    # create a model
    action_dim = env.action_space.n
    hidden_sizes = [256, 256]

    def get_cnn_model():
        cnn_model = DuelingCNN(
            cnn_layers=[
                CNNLayer(
                    input_size=4,
                    output_size=32,
                    kernel_size=5,
                    pulling_fn=nn.MaxPool2d(3),
                ),
                CNNLayer(
                    input_size=32,
                    output_size=32,
                    kernel_size=3,
                    pulling_fn=nn.MaxPool2d(3),
                ),
                CNNLayer(
                    input_size=32,
                    output_size=64,
                    kernel_size=2,
                    pulling_fn=nn.MaxPool2d(3),
                ),
            ],
            fc_layers=DuelingMLP(
                input_size=256, output_size=action_dim, hidden_sizes=hidden_sizes
            ),
        ).to(device)
        return cnn_model

    dqn = get_cnn_model()
    dqn_target = get_cnn_model()
    dqn_target.load_state_dict(dqn.state_dict())

    # create optimizer
    dqn_optim = optim.Adam(
        dqn.parameters(),
        lr=hyper_params["LR_DQN"],
        weight_decay=hyper_params["WEIGHT_DECAY"],
    )

    # make tuples to create an agent
    models = (dqn, dqn_target)

    # create an agent
    agent = Agent(env_single, env_multi, args, hyper_params, models, dqn_optim)
    agent.env_name = "Pong-v0"

    # run
    if args.test:
        agent.test()
    else:
        agent.train()