def fc_relu_q(env, hidden=64): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU(), nn.Linear(hidden, env.action_space.n), )
def fc_relu_dist_q(env, hidden=64, atoms=51): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU(), nn.Linear0(hidden, env.action_space.n * atoms), )
def nature_rainbow(env, frames=4, hidden=512, atoms=51, sigma=0.5): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.CategoricalDueling( nn.Sequential( nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma), nn.ReLU(), nn.NoisyFactorizedLinear( hidden, atoms, init_scale=0, sigma_init=sigma ) ), nn.Sequential( nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma), nn.ReLU(), nn.NoisyFactorizedLinear( hidden, env.action_space.n * atoms, init_scale=0, sigma_init=sigma ) ) ) )
def dueling_fc_relu_q(env): return nn.Sequential( nn.Flatten(), nn.Dueling( nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), nn.Linear(256, 1)), nn.Sequential(nn.Linear(env.state_space.shape[0], 256), nn.ReLU(), nn.Linear(256, env.action_space.n))))
def fc_policy(env): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear0(64, env.action_space.shape[0] * 2), )
def nature_ddqn(env, frames=4): return nn.Sequential( nn.Scale(1 / 255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Dueling( nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, 1)), nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, env.action_space.n)), ))
def fc_relu_rainbow(env, hidden=64, atoms=51, sigma=0.5): return nn.Sequential( nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU(), nn.CategoricalDueling( nn.NoisyFactorizedLinear(hidden, atoms, sigma_init=sigma), nn.NoisyFactorizedLinear(hidden, env.action_space.n * atoms, init_scale=0.0, sigma_init=sigma), ), )
def nature_features(frames=4): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), )
def shared_feature_layers(): return nn.Sequential( nn.Scale(1 / 255), nn.Conv2d(FRAMES, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), )
def conv_features(frames=4): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames * 3, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(10816, 512), nn.ReLU(), )
def nature_c51(env, frames=4, atoms=51): return nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, env.action_space.n * atoms) )
def __init__(self, env, frames=4): super().__init__() n_agents = len(env.agents) n_actions = env.action_spaces['first_0'].n self.conv = nn.Sequential( nn.Scale(1/255), nn.Conv2d(frames, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(), nn.Flatten() ) self.hidden = nn.Linear(3136 + n_agents, 512) self.output = nn.Linear0(512 + n_agents, n_actions)
def fc_relu_features(env, hidden=64): return nn.Sequential(nn.Flatten(), nn.Linear(env.state_space.shape[0], hidden), nn.ReLU())
def fc_v(env): return nn.Sequential(nn.Flatten(), nn.Linear(env.state_space.shape[0], 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, 1))