Exemplo n.º 1
0
def nature_rainbow(env, frames=4, hidden=512, atoms=51, sigma=0.5):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.CategoricalDueling(
            nn.Sequential(
                nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma),
                nn.ReLU(),
                nn.NoisyFactorizedLinear(
                    hidden,
                    atoms,
                    init_scale=0,
                    sigma_init=sigma
                )
            ),
            nn.Sequential(
                nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma),
                nn.ReLU(),
                nn.NoisyFactorizedLinear(
                    hidden,
                    env.action_space.n * atoms,
                    init_scale=0,
                    sigma_init=sigma
                )
            )
        )
    )
Exemplo n.º 2
0
 def __init__(self, env):
     super().__init__()
     self.num_actions = env.action_space.n
     self.fc = nn.Linear(512, 3136)
     self.deconv = nn.Sequential(
         nn.ConvTranspose2d(64, 64, 3, stride=1), nn.ReLU(),
         nn.ConvTranspose2d(64, 32, 4, stride=2), nn.ReLU(),
         nn.ConvTranspose2d(32, FRAMES * env.action_space.n, 8, stride=4),
         nn.Sigmoid(), nn.Scale(255))
Exemplo n.º 3
0
def nature_ddqn(env, frames=4):
    return nn.Sequential(
        nn.Scale(1 / 255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(),
        nn.Dueling(
            nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, 1)),
            nn.Sequential(nn.Linear(3136, 512), nn.ReLU(),
                          nn.Linear0(512, env.action_space.n)),
        ))
Exemplo n.º 4
0
def nature_features(frames=4):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
    )
Exemplo n.º 5
0
def shared_feature_layers():
    return nn.Sequential(
        nn.Scale(1 / 255),
        nn.Conv2d(FRAMES, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
    )
Exemplo n.º 6
0
def conv_features(frames=4):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames * 3, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(10816, 512),
        nn.ReLU(),
    )
Exemplo n.º 7
0
def nature_c51(env, frames=4, atoms=51):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
        nn.Linear0(512, env.action_space.n * atoms)
    )
Exemplo n.º 8
0
 def __init__(self, env, frames=4):
     super().__init__()
     n_agents = len(env.agents)
     n_actions = env.action_spaces['first_0'].n
     self.conv = nn.Sequential(
         nn.Scale(1/255),
         nn.Conv2d(frames, 32, 8, stride=4),
         nn.ReLU(),
         nn.Conv2d(32, 64, 4, stride=2),
         nn.ReLU(),
         nn.Conv2d(64, 64, 3, stride=1),
         nn.ReLU(),
         nn.Flatten()
     )
     self.hidden = nn.Linear(3136 + n_agents, 512)
     self.output = nn.Linear0(512 + n_agents, n_actions)
Exemplo n.º 9
0
def simple_nature_features(frames=4):
    return nn.Sequential(
        nn.Scale(1/4),
        nn.Linear(1, 16),
        nn.ReLU(),
    )