def __init__(self, input_shape, n_actions): super(RainbowDQN, self).__init__() self.conv = nn.Sequential( nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU() ) conv_out_size = self._get_conv_out(input_shape) self.fc_val = nn.Sequential( dqn_model.NoisyLinear(conv_out_size, 512), nn.ReLU(), dqn_model.NoisyLinear(512, N_ATOMS) ) self.fc_adv = nn.Sequential( dqn_model.NoisyLinear(conv_out_size, 512), nn.ReLU(), dqn_model.NoisyLinear(512, n_actions * N_ATOMS) ) self.register_buffer("supports", torch.arange(Vmin, Vmax, DELTA_Z)) self.softmax = nn.Softmax(dim=1)
def __init__(self, obs_size, act_size): super(DDPGActor, self).__init__() self.net = nn.Sequential(dqn_model.NoisyLinear(obs_size, 400), nn.ReLU(), dqn_model.NoisyLinear(400, 300), nn.ReLU(), dqn_model.NoisyLinear(300, act_size), nn.Tanh())
def __init__(self, input_shape, n_actions): super(RainbowDQN, self).__init__() self.fc_val = nn.Sequential( dqn_model.NoisyLinear(input_shape, NUMBER_NEURONS), nn.ReLU(), dqn_model.NoisyLinear(NUMBER_NEURONS, N_ATOMS)) self.fc_adv = nn.Sequential( dqn_model.NoisyLinear(input_shape, NUMBER_NEURONS), nn.ReLU(), dqn_model.NoisyLinear(NUMBER_NEURONS, n_actions * N_ATOMS)) self.register_buffer("supports", torch.arange(Vmin, Vmax + DELTA_Z, DELTA_Z)) self.softmax = nn.Softmax(dim=1)
def __init__(self, input_shape, n_actions): super(NoisyDQN, self).__init__() self.conv = nn.Sequential( nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU()) conv_out_size = self._get_conv_out(input_shape) self.noisy_layers = [ dqn_model.NoisyLinear(conv_out_size, 512), dqn_model.NoisyLinear(512, n_actions) ] self.fc = nn.Sequential(self.noisy_layers[0], nn.ReLU(), self.noisy_layers[1])