Exemplo n.º 1
0
 def __init__(self, env, hidden1=400, hidden2=300):
     super().__init__()
     self.model = nn.Sequential(
         nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.Tanh(),
         nn.Linear(hidden1, hidden2), nn.Tanh(),
         nn.Linear(hidden2, env.action_space.shape[0]))
     self.log_stds = nn.Parameter(torch.zeros(env.action_space.shape[0]))
Exemplo n.º 2
0
def critic(env, hidden1=400, hidden2=300):
    net = nn.Sequential(nn.Linear(env.state_space.shape[0], hidden1),
                        nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(),
                        nn.Linear(hidden2, 1))
    net.apply(init_weights)
    net.float()
    return net
Exemplo n.º 3
0
def fc_relu_q(env, hidden=64):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(env.state_space.shape[0], hidden),
        nn.ReLU(),
        nn.Linear(hidden, env.action_space.n),
    )
Exemplo n.º 4
0
def fc_soft_policy(env, hidden1=400, hidden2=300):
    return nn.Sequential(
        nn.Linear(env.state_space.shape[0] + 1, hidden1),
        nn.ReLU(),
        nn.Linear(hidden1, hidden2),
        nn.ReLU(),
        nn.Linear0(hidden2, env.action_space.shape[0] * 2),
    )
Exemplo n.º 5
0
def dueling_fc_relu_q(env):
    return nn.Sequential(
        nn.Flatten(),
        nn.Dueling(
            nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(),
                          nn.Linear(256, 1)),
            nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(),
                          nn.Linear(256, env.action_space.n))))
Exemplo n.º 6
0
def lunar_lander_nature_ddqn(env):
    return nn.Sequential(
        nn.Linear(env.observation_space.shape[0], 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear(64, env.action_space.n),
    )
Exemplo n.º 7
0
def fc_v(env, hidden1=400, hidden2=300):
    return nn.Sequential(
        nn.Linear(env.state_space.shape[0] + 1, hidden1),
        nn.ReLU(),
        nn.Linear(hidden1, hidden2),
        nn.ReLU(),
        nn.Linear0(hidden2, 1),
    )
Exemplo n.º 8
0
def fc_v(env, hidden1=516, hidden2=516):
    print("Custom V loaded")
    return nn.Sequential(
        nn.Linear(env.state_space.shape[0] + 1, hidden1),
        nn.ReLU(),
        nn.Linear(hidden1, hidden2),
        nn.ReLU(),
        nn.Linear0(hidden2, 1),
    )
Exemplo n.º 9
0
def fc_policy(env):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(env.state_space.shape[0], 64),
        nn.ReLU(),
        nn.Linear(64, 64),
        nn.ReLU(),
        nn.Linear0(64, env.action_space.shape[0] * 2),
    )
Exemplo n.º 10
0
def nature_ddqn(env, frames=4):
    return nn.Sequential(
        nn.Scale(1 / 255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(),
        nn.Dueling(
            nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, 1)),
            nn.Sequential(nn.Linear(3136, 512), nn.ReLU(),
                          nn.Linear0(512, env.action_space.n)),
        ))
Exemplo n.º 11
0
 def test_dueling(self):
     torch.random.manual_seed(0)
     value_model = nn.Linear(2, 1)
     advantage_model = nn.Linear(2, 3)
     model = nn.Dueling(value_model, advantage_model)
     states = torch.tensor([[1., 2.], [3., 4.]])
     result = model(states).detach().numpy()
     np.testing.assert_array_almost_equal(
         result,
         np.array([[-0.495295, 0.330573, 0.678836],
                   [-1.253222, 1.509323, 2.502186]],
                  dtype=np.float32))
Exemplo n.º 12
0
def fc_actor_critic(env, hidden1=400, hidden2=300):
    features = nn.Sequential(
        nn.Linear(env.state_space.shape[0] + 1, hidden1),
        nn.ReLU(),
    )

    v = nn.Sequential(nn.Linear(hidden1, hidden2), nn.ReLU(),
                      nn.Linear(hidden2, 1))

    policy = nn.Sequential(nn.Linear(hidden1, hidden2), nn.ReLU(),
                           nn.Linear(hidden2, env.action_space.shape[0] * 2))

    return features, v, policy
Exemplo n.º 13
0
def fc_relu_dist_q(env, hidden=64, atoms=51):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(env.state_space.shape[0], hidden),
        nn.ReLU(),
        nn.Linear0(hidden, env.action_space.n * atoms),
    )
Exemplo n.º 14
0
    def test_list(self):
        model = nn.Linear(2, 2)
        net = nn.RLNetwork(model, (2,))
        features = torch.randn((4, 2))
        done = torch.tensor([False, False, True, False])
        out = net(StateArray(features, (4,), done=done))
        tt.assert_almost_equal(
            out,
            torch.tensor(
                [
                    [0.0479387, -0.2268031],
                    [0.2346841, 0.0743403],
                    [0.0, 0.0],
                    [0.2204496, 0.086818],
                ]
            ),
        )

        features = torch.randn(3, 2)
        done = torch.tensor([False, False, False])
        out = net(StateArray(features, (3,), done=done))
        tt.assert_almost_equal(
            out,
            torch.tensor(
                [
                    [0.4234636, 0.1039939],
                    [0.6514298, 0.3354351],
                    [-0.2543002, -0.2041451],
                ]
            ),
        )
Exemplo n.º 15
0
def features(state_space_size, hidden1=400):
    net = nn.Sequential(
        nn.Linear(state_space_size + 1, hidden1),
        nn.ReLU(),
    )
    net.apply(init_weights)
    net.float()
    return net
Exemplo n.º 16
0
 def test_categorical_dueling(self):
     n_actions = 2
     n_atoms = 3
     value_model = nn.Linear(2, n_atoms)
     advantage_model = nn.Linear(2, n_actions * n_atoms)
     model = nn.CategoricalDueling(value_model, advantage_model)
     x = torch.randn((2, 2))
     out = model(x)
     self.assertEqual(out.shape, (2, 6))
     tt.assert_almost_equal(
         out,
         torch.tensor([
             [0.014, -0.691, 0.251, -0.055, -0.419, -0.03],
             [0.057, -1.172, 0.568, -0.868, -0.482, -0.679],
         ]),
         decimal=3,
     )
Exemplo n.º 17
0
 def __init__(self, env):
     super().__init__()
     self.num_actions = env.action_space.n
     self.fc = nn.Linear(512, 3136)
     self.deconv = nn.Sequential(
         nn.ConvTranspose2d(64, 64, 3, stride=1), nn.ReLU(),
         nn.ConvTranspose2d(64, 32, 4, stride=2), nn.ReLU(),
         nn.ConvTranspose2d(32, FRAMES * env.action_space.n, 8, stride=4),
         nn.Sigmoid(), nn.Scale(255))
Exemplo n.º 18
0
def shared_feature_layers():
    return nn.Sequential(
        nn.Scale(1 / 255),
        nn.Conv2d(FRAMES, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
    )
Exemplo n.º 19
0
def fc_relu_rainbow(env, hidden=64, atoms=51, sigma=0.5):
    return nn.Sequential(
        nn.Flatten(),
        nn.Linear(env.state_space.shape[0], hidden),
        nn.ReLU(),
        nn.CategoricalDueling(
            nn.NoisyFactorizedLinear(hidden, atoms, sigma_init=sigma),
            nn.NoisyFactorizedLinear(hidden,
                                     env.action_space.n * atoms,
                                     init_scale=0.0,
                                     sigma_init=sigma),
        ),
    )
Exemplo n.º 20
0
def conv_features(frames=4):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames * 3, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(10816, 512),
        nn.ReLU(),
    )
Exemplo n.º 21
0
def nature_features(frames=4):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
    )
Exemplo n.º 22
0
def nature_c51(env, frames=4, atoms=51):
    return nn.Sequential(
        nn.Scale(1/255),
        nn.Conv2d(frames, 32, 8, stride=4),
        nn.ReLU(),
        nn.Conv2d(32, 64, 4, stride=2),
        nn.ReLU(),
        nn.Conv2d(64, 64, 3, stride=1),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(3136, 512),
        nn.ReLU(),
        nn.Linear0(512, env.action_space.n * atoms)
    )
Exemplo n.º 23
0
 def __init__(self, env, frames=4):
     super().__init__()
     n_agents = len(env.agents)
     n_actions = env.action_spaces['first_0'].n
     self.conv = nn.Sequential(
         nn.Scale(1/255),
         nn.Conv2d(frames, 32, 8, stride=4),
         nn.ReLU(),
         nn.Conv2d(32, 64, 4, stride=2),
         nn.ReLU(),
         nn.Conv2d(64, 64, 3, stride=1),
         nn.ReLU(),
         nn.Flatten()
     )
     self.hidden = nn.Linear(3136 + n_agents, 512)
     self.output = nn.Linear0(512 + n_agents, n_actions)
Exemplo n.º 24
0
    def test_list(self):
        model = nn.Linear(2, 2)
        net = nn.ListNetwork(model, (2, ))
        features = torch.randn((4, 2))
        done = torch.tensor([1, 1, 0, 1], dtype=torch.uint8)
        out = net(State(features, done))
        tt.assert_almost_equal(
            out,
            torch.tensor([[0.0479387, -0.2268031], [0.2346841, 0.0743403],
                          [0., 0.], [0.2204496, 0.086818]]))

        features = torch.randn(3, 2)
        done = torch.tensor([1, 1, 1], dtype=torch.uint8)
        out = net(State(features, done))
        tt.assert_almost_equal(
            out,
            torch.tensor([[0.4234636, 0.1039939], [0.6514298, 0.3354351],
                          [-0.2543002, -0.2041451]]))
Exemplo n.º 25
0
 def __init__(
     self,
     inp_size,
     out_size,
     hidden_m,
     hidden_s,
 ):
     super().__init__()
     self.inp_layer = nn.Linear(inp_size, hidden_m[0])
     self.hidden_00 = nn.Linear(hidden_m[0], hidden_m[1])
     self.hidden_01 = nn.Linear(hidden_m[1], out_size)
     self.hidden_10 = nn.Linear(hidden_s[0], hidden_s[1])
     self.hidden_11 = nn.Linear(hidden_s[1], out_size)
     self.final_layer = nn.Linear(out_size, out_size)
Exemplo n.º 26
0
    def test_list_to_list(self):
        model = nn.Linear(2, 2)
        net = nn.ListToList(model)
        x = State(torch.randn(5, 2), torch.tensor([1, 1, 1, 0, 1]))
        out = net(x)
        tt.assert_almost_equal(out.features,
                               torch.tensor([[0.0479,
                                              -0.2268], [0.2347, 0.0743],
                                             [0.0185,
                                              0.0815], [0.2204, 0.0868],
                                             [0.4235, 0.1040]]),
                               decimal=3)
        x = State(torch.randn(3, 2))
        out = net(x)
        tt.assert_almost_equal(out.features,
                               torch.tensor([[0.651, 0.335], [-0.254, -0.204],
                                             [0.123, 0.218]]),
                               decimal=3)

        x = State(torch.randn(2))
        out = net(x)
        tt.assert_almost_equal(out.features,
                               torch.tensor([0.3218211, 0.3707529]),
                               decimal=3)
Exemplo n.º 27
0
def fc_relu_features(env, hidden=64):
    return nn.Sequential(nn.Flatten(),
                         nn.Linear(env.state_space.shape[0], hidden),
                         nn.ReLU())
Exemplo n.º 28
0
def nature_value_head():
    return nn.Linear(512, 1)
Exemplo n.º 29
0
def simple_nature_features(frames=4):
    return nn.Sequential(
        nn.Scale(1/4),
        nn.Linear(1, 16),
        nn.ReLU(),
    )
Exemplo n.º 30
0
def simple_nature_value_head():
    return nn.Linear(16, 1)