def __init__(self, output_num=globals.OUTPUT_NUM): nn.Module.__init__(self) self.conv = nn.Sequential( nn.Conv2d(globals.IMAGE_CHANNELS + globals.EXTRA_CHANNELS, 30, kernel_size=(8, 8), stride=(4, 4)), nn.LeakyReLU(), nn.Conv2d(30, 35, kernel_size=(4, 4), stride=(2, 2)), nn.LeakyReLU(), nn.Conv2d(35, 40, kernel_size=(3, 3), stride=(1, 1)), nn.LeakyReLU(), ) self.fc_input_size = flat_size_after_conv(self.conv, globals.IMAGE_HEIGHT, globals.IMAGE_WIDTH) self.fc = nn.Sequential( NoisyLinear(self.fc_input_size, 512), nn.LeakyReLU(), #nn.Linear(512, output_num), NoisyLinear(512, output_num), #nn.Softmax(), )
def __init__(self, in_channels, button_num, variable_num, frame_num): super(BaseModelNoisy, self).__init__() self.screen_feature_num = 256 self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3, stride=2) self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2) self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2) self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2) self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2) self.conv6 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2) self.screen_features1 = nn.Linear(512 * 2 * 4, self.screen_feature_num) #self.screen_features1 = nn.Linear(128 * 6 * 9, self.screen_feature_num) #self.screen_features1 = nn.Linear(64 * 14 * 19, self.screen_feature_num) self.batch_norm = nn.BatchNorm1d(self.screen_feature_num) layer1_size = 128 self.action1 = NoisyLinear(self.screen_feature_num, layer1_size) self.action2 = NoisyLinear(layer1_size + variable_num, button_num) self.value1 = NoisyLinear(self.screen_feature_num, layer1_size) self.value2 = NoisyLinear(layer1_size + variable_num, 1) self.screens = None self.frame_num = frame_num
def __init__(self, states_size: int, action_size: int, settings: dict, device: torch.device) -> None: """ Initializes the DistributionalDuelDqn Args: states_size (int): Size of the input space. action_size (int):Size of the action space. settings (dict): dictionary with settings device( torch.device): "gpu" or "cpu" """ super(DistributionalDuelDQN, self).__init__() DistributionalNetHelper.__init__(self, settings, neural_network_call=self.forward, device=device) self.batch_size = settings["batch_size"] self.number_atoms = settings["number_atoms"] layers_size = settings["layers_sizes"][0] self.noisy_net = settings['noisy_nets'] if not self.noisy_net: self.FC1 = nn.Linear(int(states_size), layers_size) self.FC2 = nn.Linear(layers_size, layers_size) self.FC3v = nn.Linear(layers_size, self.number_atoms) self.FC3a = nn.Linear(layers_size, int(action_size * self.number_atoms)) else: self.FC1 = NoisyLinear(int(states_size), layers_size) self.FC2 = NoisyLinear(layers_size, layers_size) self.FC3v = NoisyLinear(layers_size, self.number_atoms) self.FC3a = NoisyLinear(layers_size, int(action_size) * self.number_atoms) self.reset()
def __init__( self, in_dim: int, out_dim: int, atom_size: int, support: torch.Tensor ): """Initialization.""" super(Network, self).__init__() self.support = support self.out_dim = out_dim self.atom_size = atom_size # set common feature layer self.feature_layer = nn.Sequential( nn.Linear(in_dim, 128), nn.ReLU(), ) # set advantage layer self.advantage_hidden_layer = NoisyLinear(128, 128) self.advantage_layer = NoisyLinear(128, out_dim * atom_size) # set value layer self.value_hidden_layer = NoisyLinear(128, 128) self.value_layer = NoisyLinear(128, atom_size)
class Network(nn.Module): def __init__( self, in_dim: int, out_dim: int, atom_size: int, support: torch.Tensor ): """Initialization.""" super(Network, self).__init__() self.support = support self.out_dim = out_dim self.atom_size = atom_size # set common feature layer self.feature_layer = nn.Sequential( nn.Linear(in_dim, 128), nn.ReLU(), ) # set advantage layer self.advantage_hidden_layer = NoisyLinear(128, 128) self.advantage_layer = NoisyLinear(128, out_dim * atom_size) # set value layer self.value_hidden_layer = NoisyLinear(128, 128) self.value_layer = NoisyLinear(128, atom_size) def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward method implementation.""" dist = self.dist(x) q = torch.sum(dist * self.support, dim=2) return q def dist(self, x: torch.Tensor) -> torch.Tensor: """Get distribution for atoms.""" feature = self.feature_layer(x) adv_hid = F.relu(self.advantage_hidden_layer(feature)) val_hid = F.relu(self.value_hidden_layer(feature)) advantage = self.advantage_layer(adv_hid).view( -1, self.out_dim, self.atom_size ) value = self.value_layer(val_hid).view(-1, 1, self.atom_size) q_atoms = value + advantage - advantage.mean(dim=1, keepdim=True) dist = F.softmax(q_atoms, dim=-1) dist = dist.clamp(min=1e-3) # for avoiding nans return dist def reset_noise(self): """Reset all noisy layers.""" self.advantage_hidden_layer.reset_noise() self.advantage_layer.reset_noise() self.value_hidden_layer.reset_noise() self.value_layer.reset_noise()
class Agent(nn.Module): def __init__(self, input_shape, num_atoms, num_actions=4): super(Agent, self).__init__() self.input_shape = input_shape self.num_actions = num_actions self.num_atoms = num_atoms self.features = nn.Sequential( nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU()) self.noisy_value1 = NoisyLinear(self.features_size(), 512) self.noisy_value2 = NoisyLinear(512, self.num_atoms) self.noisy_advantage1 = NoisyLinear(self.features_size(), 512) self.noisy_advantage2 = NoisyLinear(512, self.num_atoms * self.num_actions) def features_size(self): return self.features(torch.zeros(1, *self.input_shape)).view(1, -1).size(1) def forward(self, x): batch_size = x.size(0) x = self.features(x) x = x.view(batch_size, -1) value = F.relu(self.noisy_value1(x)) value = self.noisy_value2(value) advantage = F.relu(self.noisy_advantage1(x)) advantage = self.noisy_advantage2(advantage) value = value.view(batch_size, 1, self.num_atoms) advantage = advantage.view(batch_size, self.num_actions, self.num_atoms) x = value + advantage - advantage.mean(1, keepdim=True) x = x.view(-1, self.num_actions, self.num_atoms) return x def reset_noise(self): self.noisy_value1.reset_noise() self.noisy_value2.reset_noise() self.noisy_advantage1.reset_noise() self.noisy_advantage2.reset_noise() def act(self, state, epsilon): if np.random.rand() > epsilon: with torch.no_grad(): state = torch.FloatTensor(state).unsqueeze(0) qvalues = self.forward(state).mean(2) action = qvalues.max(1)[1] action = action.data.cpu().numpy()[0] else: action = np.random.randint(self.num_actions) return action
def __init__(self, input_shape, num_atoms, num_actions=4): super(Agent, self).__init__() self.input_shape = input_shape self.num_actions = num_actions self.num_atoms = num_atoms self.features = nn.Sequential( nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU()) self.noisy_value1 = NoisyLinear(self.features_size(), 512) self.noisy_value2 = NoisyLinear(512, self.num_atoms) self.noisy_advantage1 = NoisyLinear(self.features_size(), 512) self.noisy_advantage2 = NoisyLinear(512, self.num_atoms * self.num_actions)
class Dqn(Qnet): def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None: """ Initializes the neural network. Args: states_size: Size of the input space. action_size:Size of the action space. settings: dictionary with settings """ super(Dqn, self).__init__() self.batch_size = settings["batch_size"] self.noisy_net = settings['noisy_net'] layers_size = settings["layers_sizes"][0] if not self.noisy_net: self.FC1 = nn.Linear(int(states_size), layers_size) self.FC2 = nn.Linear(layers_size, layers_size) self.FC3 = nn.Linear(layers_size, int(action_size)) else: self.FC1 = NoisyLinear(int(states_size), layers_size ) self.FC2 = NoisyLinear(layers_size, layers_size) self.FC3 = NoisyLinear(layers_size, int(action_size)) self.reset() def forward(self, x: torch.Tensor) -> torch.Tensor: """ Forward step of the neural network Args: x(torch.Tensor): observation or a batch of observations Returns: torch.Tensor: q-values for all observations and actions, size: batch_size x actions_size """ x = functional.relu(self.FC1(x)) x = functional.relu(self.FC2(x)) return self.FC3(x) def reset(self) -> None: """ Resets the weights of the neural network layers. Returns: None """ torch.nn.init.xavier_uniform_(self.FC1.weight.data) torch.nn.init.xavier_uniform_(self.FC2.weight.data) torch.nn.init.xavier_uniform_(self.FC3.weight.data) if self.noisy_net: self.reset_noise() def reset_noise(self) -> None: """ Resets the noise of the noisy layers. """ self.FC1.reset_noise() self.FC2.reset_noise() self.FC3.reset_noise()
def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None: """ Initializes the neural network. Args: states_size: Size of the input space. action_size:Size of the action space. settings: dictionary with settings, currently not used. """ super(DuelDQN, self).__init__() self.batch_size = settings["batch_size"] layers_size = settings["layers_sizes"][0] self.noisy_net = settings['noisy_nets'] if not self.noisy_net: self.FC1 = nn.Linear(int(states_size), layers_size) self.FC2 = nn.Linear(layers_size, layers_size) self.FC3v = nn.Linear(layers_size, 1) self.FC3a = nn.Linear(layers_size, int(action_size)) else: self.FC1 = NoisyLinear(int(states_size), layers_size) self.FC2 = NoisyLinear(layers_size, layers_size) self.FC3v = NoisyLinear(layers_size, 1) self.FC3a = NoisyLinear(layers_size, int(action_size)) self.reset()
class DuelDQN(Qnet): def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None: """ Initializes the neural network. Args: states_size: Size of the input space. action_size:Size of the action space. settings: dictionary with settings, currently not used. """ super(DuelDQN, self).__init__() self.batch_size = settings["batch_size"] layers_size = settings["layers_sizes"][0] self.noisy_net = settings['noisy_nets'] if not self.noisy_net: self.FC1 = nn.Linear(int(states_size), layers_size) self.FC2 = nn.Linear(layers_size, layers_size) self.FC3v = nn.Linear(layers_size, 1) self.FC3a = nn.Linear(layers_size, int(action_size)) else: self.FC1 = NoisyLinear(int(states_size), layers_size) self.FC2 = NoisyLinear(layers_size, layers_size) self.FC3v = NoisyLinear(layers_size, 1) self.FC3a = NoisyLinear(layers_size, int(action_size)) self.reset() def forward(self, x: torch.Tensor) -> torch.Tensor: """ Forward step of the duelling q-network Args: x(torch.Tensor): observation or a batch of observations Returns: torch.Tensor: q-values for all observations and actions """ x = functional.relu(self.FC1(x)) x = functional.relu(self.FC2(x)) v = self.FC3v(x) a = self.FC3a(x) if x.ndimension() == 1: qvals = v + (a - torch.mean(a)) else: qvals = v + (a - torch.mean(a, 1, True)) return qvals def reset(self) -> None: """ Resets the weights of the neural network layers. Returns: None """ torch.nn.init.xavier_uniform_(self.FC1.weight.data) torch.nn.init.xavier_uniform_(self.FC2.weight.data) torch.nn.init.xavier_uniform_(self.FC3a.weight.data) torch.nn.init.xavier_uniform_(self.FC3v.weight.data) if self.noisy_net: self.reset_noise() def reset_noise(self) -> None: """ Resets the noise of the noisy layers. """ self.FC1.reset_noise() self.FC2.reset_noise() self.FC3a.reset_noise() self.FC3v.reset_noise()
class BaseModelNoisy(AACBase): def __init__(self, in_channels, button_num, variable_num, frame_num): super(BaseModelNoisy, self).__init__() self.screen_feature_num = 256 self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3, stride=2) self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2) self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2) self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2) self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2) self.conv6 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2) self.screen_features1 = nn.Linear(512 * 2 * 4, self.screen_feature_num) #self.screen_features1 = nn.Linear(128 * 6 * 9, self.screen_feature_num) #self.screen_features1 = nn.Linear(64 * 14 * 19, self.screen_feature_num) self.batch_norm = nn.BatchNorm1d(self.screen_feature_num) layer1_size = 128 self.action1 = NoisyLinear(self.screen_feature_num, layer1_size) self.action2 = NoisyLinear(layer1_size + variable_num, button_num) self.value1 = NoisyLinear(self.screen_feature_num, layer1_size) self.value2 = NoisyLinear(layer1_size + variable_num, 1) self.screens = None self.frame_num = frame_num def forward(self, screen, variables): # cnn screen_features = F.relu(self.conv1(screen)) screen_features = F.relu(self.conv2(screen_features)) screen_features = F.relu(self.conv3(screen_features)) screen_features = F.relu(self.conv4(screen_features)) screen_features = F.relu(self.conv5(screen_features)) screen_features = F.relu(self.conv6(screen_features)) screen_features = screen_features.view(screen_features.size(0), -1) # features input = self.screen_features1(screen_features) input = self.batch_norm(input) input = F.relu(input) # action action = F.relu(self.action1(input)) action = torch.cat([action, variables], 1) action = self.action2(action) return action, input def transform_input(self, screen, variables): screen_batch = [] if self.frame_num > 1: if self.screens is None: self.screens = [[]] * len(screen) for idx, screens in enumerate(self.screens): if len(screens) >= self.frame_num: screens.pop(0) screens.append(screen[idx]) if len(screens) == 1: for i in range(self.frame_num - 1): screens.append(screen[idx]) screen_batch.append(torch.cat(screens, 0)) screen = torch.stack(screen_batch) screen = Variable(screen, volatile=not self.training) variables = Variable(variables / 100, volatile=not self.training) return screen, variables def set_terminal(self, terminal): if self.screens is not None: indexes = torch.nonzero(terminal == 0).squeeze() for idx in range(len(indexes)): self.screens[indexes[idx]] = [] def sample_noisy_weight(self): self.action1.sample() self.action2.sample() self.value1.sample() self.value2.sample()
class DistributionalDuelDQN(nn.Module, DistributionalNetHelper): def __init__(self, states_size: int, action_size: int, settings: dict, device: torch.device) -> None: """ Initializes the DistributionalDuelDqn Args: states_size (int): Size of the input space. action_size (int):Size of the action space. settings (dict): dictionary with settings device( torch.device): "gpu" or "cpu" """ super(DistributionalDuelDQN, self).__init__() DistributionalNetHelper.__init__(self, settings, neural_network_call=self.forward, device=device) self.batch_size = settings["batch_size"] self.number_atoms = settings["number_atoms"] layers_size = settings["layers_sizes"][0] self.noisy_net = settings['noisy_nets'] if not self.noisy_net: self.FC1 = nn.Linear(int(states_size), layers_size) self.FC2 = nn.Linear(layers_size, layers_size) self.FC3v = nn.Linear(layers_size, self.number_atoms) self.FC3a = nn.Linear(layers_size, int(action_size * self.number_atoms)) else: self.FC1 = NoisyLinear(int(states_size), layers_size) self.FC2 = NoisyLinear(layers_size, layers_size) self.FC3v = NoisyLinear(layers_size, self.number_atoms) self.FC3a = NoisyLinear(layers_size, int(action_size) * self.number_atoms) self.reset() def forward(self, x: torch.Tensor) -> torch.Tensor: """ Forward pass of the distributional neural networ Args: x(torch.Tensor): a batch of observations Returns: torch.Tensor: distributions for each sample and action, size: batch_size x action_size x number_atoms """ if x.ndimension() == 1: batch_size = 1 else: batch_size = x.size()[0] x = nn.functional.relu(self.FC1(x)) x = nn.functional.relu(self.FC2(x)) a = self.FC3a(x) v = self.FC3v(x) a = a.view([batch_size, -1, self.number_atoms]) average = a.mean(1).unsqueeze(1) a_scaled = a - average if batch_size > 1: v = v.unsqueeze(1) return_vals = v + a_scaled return return_vals def reset(self) -> None: """ Resets the weights of the neural network layers and the noise of the noisy layers. Returns: None """ torch.nn.init.xavier_uniform_(self.FC1.weight.data) torch.nn.init.xavier_uniform_(self.FC2.weight.data) torch.nn.init.xavier_uniform_(self.FC3a.weight.data) torch.nn.init.xavier_uniform_(self.FC3v.weight.data) if self.noisy_net: self.reset_noise() def reset_noise(self) -> None: """ Samples noise for the noisy layers. """ self.FC1.reset_noise() self.FC2.reset_noise() self.FC3a.reset_noise() self.FC3v.reset_noise()