def __init__(self, env, hidden1=400, hidden2=300): super().__init__() self.model = nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.Tanh(), nn.Linear(hidden1, hidden2), nn.Tanh(), nn.Linear(hidden2, env.action_space.shape[0])) self.log_stds = nn.Parameter(torch.zeros(env.action_space.shape[0]))
def critic(env, hidden1=400, hidden2=300): net = nn.Sequential(nn.Linear(env.state_space.shape[0], hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear(hidden2, 1)) net.apply(init_weights) net.float() return net
def fc_relu_q(env, hidden=64): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU(), nn.Linear(hidden, env.action_space.n), )
def fc_soft_policy(env, hidden1=400, hidden2=300): return nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear0(hidden2, env.action_space.shape[0] * 2), )
def dueling_fc_relu_q(env): return nn.Sequential( nn.Flatten(), nn.Dueling( nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), nn.Linear(256, 1)), nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), nn.Linear(256, env.action_space.n))))
def lunar_lander_nature_ddqn(env): return nn.Sequential( nn.Linear(env.observation_space.shape[0], 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, env.action_space.n), )
def fc_v(env, hidden1=400, hidden2=300): return nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear0(hidden2, 1), )
def fc_v(env, hidden1=516, hidden2=516): print("Custom V loaded") return nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear0(hidden2, 1), )
def fc_policy(env): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear0(64, env.action_space.shape[0] * 2), )
def nature_ddqn(env, frames=4): return nn.Sequential( nn.Scale(1 / 255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Dueling( nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, 1)), nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, env.action_space.n)), ))
def test_dueling(self): torch.random.manual_seed(0) value_model = nn.Linear(2, 1) advantage_model = nn.Linear(2, 3) model = nn.Dueling(value_model, advantage_model) states = torch.tensor([[1., 2.], [3., 4.]]) result = model(states).detach().numpy() np.testing.assert_array_almost_equal( result, np.array([[-0.495295, 0.330573, 0.678836], [-1.253222, 1.509323, 2.502186]], dtype=np.float32))
def fc_actor_critic(env, hidden1=400, hidden2=300): features = nn.Sequential( nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), ) v = nn.Sequential(nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear(hidden2, 1)) policy = nn.Sequential(nn.Linear(hidden1, hidden2), nn.ReLU(), nn.Linear(hidden2, env.action_space.shape[0] * 2)) return features, v, policy
def fc_relu_dist_q(env, hidden=64, atoms=51): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU(), nn.Linear0(hidden, env.action_space.n * atoms), )
def test_list(self): model = nn.Linear(2, 2) net = nn.RLNetwork(model, (2,)) features = torch.randn((4, 2)) done = torch.tensor([False, False, True, False]) out = net(StateArray(features, (4,), done=done)) tt.assert_almost_equal( out, torch.tensor( [ [0.0479387, -0.2268031], [0.2346841, 0.0743403], [0.0, 0.0], [0.2204496, 0.086818], ] ), ) features = torch.randn(3, 2) done = torch.tensor([False, False, False]) out = net(StateArray(features, (3,), done=done)) tt.assert_almost_equal( out, torch.tensor( [ [0.4234636, 0.1039939], [0.6514298, 0.3354351], [-0.2543002, -0.2041451], ] ), )
def features(state_space_size, hidden1=400): net = nn.Sequential( nn.Linear(state_space_size + 1, hidden1), nn.ReLU(), ) net.apply(init_weights) net.float() return net
def test_categorical_dueling(self): n_actions = 2 n_atoms = 3 value_model = nn.Linear(2, n_atoms) advantage_model = nn.Linear(2, n_actions * n_atoms) model = nn.CategoricalDueling(value_model, advantage_model) x = torch.randn((2, 2)) out = model(x) self.assertEqual(out.shape, (2, 6)) tt.assert_almost_equal( out, torch.tensor([ [0.014, -0.691, 0.251, -0.055, -0.419, -0.03], [0.057, -1.172, 0.568, -0.868, -0.482, -0.679], ]), decimal=3, )
def __init__(self, env): super().__init__() self.num_actions = env.action_space.n self.fc = nn.Linear(512, 3136) self.deconv = nn.Sequential( nn.ConvTranspose2d(64, 64, 3, stride=1), nn.ReLU(), nn.ConvTranspose2d(64, 32, 4, stride=2), nn.ReLU(), nn.ConvTranspose2d(32, FRAMES * env.action_space.n, 8, stride=4), nn.Sigmoid(), nn.Scale(255))
def shared_feature_layers(): return nn.Sequential( nn.Scale(1 / 255), nn.Conv2d(FRAMES, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), )
def fc_relu_rainbow(env, hidden=64, atoms=51, sigma=0.5): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU(), nn.CategoricalDueling( nn.NoisyFactorizedLinear(hidden, atoms, sigma_init=sigma), nn.NoisyFactorizedLinear(hidden, env.action_space.n * atoms, init_scale=0.0, sigma_init=sigma), ), )
def conv_features(frames=4): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames * 3, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(10816, 512), nn.ReLU(), )
def nature_features(frames=4): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), )
def nature_c51(env, frames=4, atoms=51): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, env.action_space.n * atoms) )
def __init__(self, env, frames=4): super().__init__() n_agents = len(env.agents) n_actions = env.action_spaces['first_0'].n self.conv = nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten() ) self.hidden = nn.Linear(3136 + n_agents, 512) self.output = nn.Linear0(512 + n_agents, n_actions)
def test_list(self): model = nn.Linear(2, 2) net = nn.ListNetwork(model, (2, )) features = torch.randn((4, 2)) done = torch.tensor([1, 1, 0, 1], dtype=torch.uint8) out = net(State(features, done)) tt.assert_almost_equal( out, torch.tensor([[0.0479387, -0.2268031], [0.2346841, 0.0743403], [0., 0.], [0.2204496, 0.086818]])) features = torch.randn(3, 2) done = torch.tensor([1, 1, 1], dtype=torch.uint8) out = net(State(features, done)) tt.assert_almost_equal( out, torch.tensor([[0.4234636, 0.1039939], [0.6514298, 0.3354351], [-0.2543002, -0.2041451]]))
def __init__( self, inp_size, out_size, hidden_m, hidden_s, ): super().__init__() self.inp_layer = nn.Linear(inp_size, hidden_m[0]) self.hidden_00 = nn.Linear(hidden_m[0], hidden_m[1]) self.hidden_01 = nn.Linear(hidden_m[1], out_size) self.hidden_10 = nn.Linear(hidden_s[0], hidden_s[1]) self.hidden_11 = nn.Linear(hidden_s[1], out_size) self.final_layer = nn.Linear(out_size, out_size)
def test_list_to_list(self): model = nn.Linear(2, 2) net = nn.ListToList(model) x = State(torch.randn(5, 2), torch.tensor([1, 1, 1, 0, 1])) out = net(x) tt.assert_almost_equal(out.features, torch.tensor([[0.0479, -0.2268], [0.2347, 0.0743], [0.0185, 0.0815], [0.2204, 0.0868], [0.4235, 0.1040]]), decimal=3) x = State(torch.randn(3, 2)) out = net(x) tt.assert_almost_equal(out.features, torch.tensor([[0.651, 0.335], [-0.254, -0.204], [0.123, 0.218]]), decimal=3) x = State(torch.randn(2)) out = net(x) tt.assert_almost_equal(out.features, torch.tensor([0.3218211, 0.3707529]), decimal=3)
def fc_relu_features(env, hidden=64): return nn.Sequential(nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU())
def nature_value_head(): return nn.Linear(512, 1)
def simple_nature_features(frames=4): return nn.Sequential( nn.Scale(1/4), nn.Linear(1, 16), nn.ReLU(), )
def simple_nature_value_head(): return nn.Linear(16, 1)