Exemplo n.º 1
0
    def __init__(self, output_num=globals.OUTPUT_NUM):
        nn.Module.__init__(self)
        self.conv = nn.Sequential(
            nn.Conv2d(globals.IMAGE_CHANNELS + globals.EXTRA_CHANNELS,
                      30,
                      kernel_size=(8, 8),
                      stride=(4, 4)),
            nn.LeakyReLU(),
            nn.Conv2d(30, 35, kernel_size=(4, 4), stride=(2, 2)),
            nn.LeakyReLU(),
            nn.Conv2d(35, 40, kernel_size=(3, 3), stride=(1, 1)),
            nn.LeakyReLU(),
        )

        self.fc_input_size = flat_size_after_conv(self.conv,
                                                  globals.IMAGE_HEIGHT,
                                                  globals.IMAGE_WIDTH)

        self.fc = nn.Sequential(
            NoisyLinear(self.fc_input_size, 512),
            nn.LeakyReLU(),

            #nn.Linear(512, output_num),
            NoisyLinear(512, output_num),
            #nn.Softmax(),
        )
Exemplo n.º 2
0
    def __init__(self, in_channels, button_num, variable_num, frame_num):
        super(BaseModelNoisy, self).__init__()
        self.screen_feature_num = 256
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2)
        self.conv6 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2)

        self.screen_features1 = nn.Linear(512 * 2 * 4, self.screen_feature_num)
        #self.screen_features1 = nn.Linear(128 * 6 * 9, self.screen_feature_num)
        #self.screen_features1 = nn.Linear(64 * 14 * 19, self.screen_feature_num)

        self.batch_norm = nn.BatchNorm1d(self.screen_feature_num)

        layer1_size = 128
        self.action1 = NoisyLinear(self.screen_feature_num, layer1_size)
        self.action2 = NoisyLinear(layer1_size + variable_num, button_num)

        self.value1 = NoisyLinear(self.screen_feature_num, layer1_size)
        self.value2 = NoisyLinear(layer1_size + variable_num, 1)

        self.screens = None
        self.frame_num = frame_num
Exemplo n.º 3
0
 def __init__(self, states_size: int, action_size: int, settings: dict,
              device: torch.device) -> None:
     """
     Initializes the DistributionalDuelDqn
     Args:
         states_size (int): Size of the input space.
         action_size (int):Size of the action space.
         settings (dict): dictionary with settings
         device( torch.device): "gpu" or "cpu"
     """
     super(DistributionalDuelDQN, self).__init__()
     DistributionalNetHelper.__init__(self,
                                      settings,
                                      neural_network_call=self.forward,
                                      device=device)
     self.batch_size = settings["batch_size"]
     self.number_atoms = settings["number_atoms"]
     layers_size = settings["layers_sizes"][0]
     self.noisy_net = settings['noisy_nets']
     if not self.noisy_net:
         self.FC1 = nn.Linear(int(states_size), layers_size)
         self.FC2 = nn.Linear(layers_size, layers_size)
         self.FC3v = nn.Linear(layers_size, self.number_atoms)
         self.FC3a = nn.Linear(layers_size,
                               int(action_size * self.number_atoms))
     else:
         self.FC1 = NoisyLinear(int(states_size), layers_size)
         self.FC2 = NoisyLinear(layers_size, layers_size)
         self.FC3v = NoisyLinear(layers_size, self.number_atoms)
         self.FC3a = NoisyLinear(layers_size,
                                 int(action_size) * self.number_atoms)
     self.reset()
Exemplo n.º 4
0
    def __init__(
        self, 
        in_dim: int, 
        out_dim: int, 
        atom_size: int, 
        support: torch.Tensor
    ):
        """Initialization."""
        super(Network, self).__init__()
        
        self.support = support
        self.out_dim = out_dim
        self.atom_size = atom_size

        # set common feature layer
        self.feature_layer = nn.Sequential(
            nn.Linear(in_dim, 128), 
            nn.ReLU(),
        )
        
        # set advantage layer
        self.advantage_hidden_layer = NoisyLinear(128, 128)
        self.advantage_layer = NoisyLinear(128, out_dim * atom_size)

        # set value layer
        self.value_hidden_layer = NoisyLinear(128, 128)
        self.value_layer = NoisyLinear(128, atom_size)
Exemplo n.º 5
0
class Network(nn.Module):
    def __init__(
        self, 
        in_dim: int, 
        out_dim: int, 
        atom_size: int, 
        support: torch.Tensor
    ):
        """Initialization."""
        super(Network, self).__init__()
        
        self.support = support
        self.out_dim = out_dim
        self.atom_size = atom_size

        # set common feature layer
        self.feature_layer = nn.Sequential(
            nn.Linear(in_dim, 128), 
            nn.ReLU(),
        )
        
        # set advantage layer
        self.advantage_hidden_layer = NoisyLinear(128, 128)
        self.advantage_layer = NoisyLinear(128, out_dim * atom_size)

        # set value layer
        self.value_hidden_layer = NoisyLinear(128, 128)
        self.value_layer = NoisyLinear(128, atom_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Forward method implementation."""
        dist = self.dist(x)
        q = torch.sum(dist * self.support, dim=2)
        
        return q
    
    def dist(self, x: torch.Tensor) -> torch.Tensor:
        """Get distribution for atoms."""
        feature = self.feature_layer(x)
        adv_hid = F.relu(self.advantage_hidden_layer(feature))
        val_hid = F.relu(self.value_hidden_layer(feature))
        
        advantage = self.advantage_layer(adv_hid).view(
            -1, self.out_dim, self.atom_size
        )
        value = self.value_layer(val_hid).view(-1, 1, self.atom_size)
        q_atoms = value + advantage - advantage.mean(dim=1, keepdim=True)
        
        dist = F.softmax(q_atoms, dim=-1)
        dist = dist.clamp(min=1e-3)  # for avoiding nans
        
        return dist
    
    def reset_noise(self):
        """Reset all noisy layers."""
        self.advantage_hidden_layer.reset_noise()
        self.advantage_layer.reset_noise()
        self.value_hidden_layer.reset_noise()
        self.value_layer.reset_noise()
Exemplo n.º 6
0
class Agent(nn.Module):
    def __init__(self, input_shape, num_atoms, num_actions=4):
        super(Agent, self).__init__()

        self.input_shape = input_shape
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.features = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU())
        self.noisy_value1 = NoisyLinear(self.features_size(), 512)
        self.noisy_value2 = NoisyLinear(512, self.num_atoms)
        self.noisy_advantage1 = NoisyLinear(self.features_size(), 512)
        self.noisy_advantage2 = NoisyLinear(512,
                                            self.num_atoms * self.num_actions)

    def features_size(self):
        return self.features(torch.zeros(1,
                                         *self.input_shape)).view(1,
                                                                  -1).size(1)

    def forward(self, x):
        batch_size = x.size(0)
        x = self.features(x)
        x = x.view(batch_size, -1)
        value = F.relu(self.noisy_value1(x))
        value = self.noisy_value2(value)
        advantage = F.relu(self.noisy_advantage1(x))
        advantage = self.noisy_advantage2(advantage)
        value = value.view(batch_size, 1, self.num_atoms)
        advantage = advantage.view(batch_size, self.num_actions,
                                   self.num_atoms)
        x = value + advantage - advantage.mean(1, keepdim=True)
        x = x.view(-1, self.num_actions, self.num_atoms)
        return x

    def reset_noise(self):
        self.noisy_value1.reset_noise()
        self.noisy_value2.reset_noise()
        self.noisy_advantage1.reset_noise()
        self.noisy_advantage2.reset_noise()

    def act(self, state, epsilon):
        if np.random.rand() > epsilon:
            with torch.no_grad():
                state = torch.FloatTensor(state).unsqueeze(0)
            qvalues = self.forward(state).mean(2)
            action = qvalues.max(1)[1]
            action = action.data.cpu().numpy()[0]
        else:
            action = np.random.randint(self.num_actions)
        return action
Exemplo n.º 7
0
    def __init__(self, input_shape, num_atoms, num_actions=4):
        super(Agent, self).__init__()

        self.input_shape = input_shape
        self.num_actions = num_actions
        self.num_atoms = num_atoms
        self.features = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU())
        self.noisy_value1 = NoisyLinear(self.features_size(), 512)
        self.noisy_value2 = NoisyLinear(512, self.num_atoms)
        self.noisy_advantage1 = NoisyLinear(self.features_size(), 512)
        self.noisy_advantage2 = NoisyLinear(512,
                                            self.num_atoms * self.num_actions)
Exemplo n.º 8
0
class Dqn(Qnet):
    def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None:
        """
        Initializes the neural network.
        Args:
            states_size: Size of the input space.
            action_size:Size of the action space.
            settings: dictionary with settings
        """
        super(Dqn, self).__init__()
        self.batch_size = settings["batch_size"]
        self.noisy_net = settings['noisy_net']
        layers_size = settings["layers_sizes"][0]
        if not self.noisy_net:
            self.FC1 = nn.Linear(int(states_size), layers_size)
            self.FC2 = nn.Linear(layers_size, layers_size)
            self.FC3 = nn.Linear(layers_size, int(action_size))
        else:
            self.FC1 = NoisyLinear(int(states_size), layers_size )
            self.FC2 = NoisyLinear(layers_size, layers_size)
            self.FC3 = NoisyLinear(layers_size, int(action_size))
        self.reset()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward step of the neural network
        Args:
            x(torch.Tensor): observation or a batch of observations

        Returns:
            torch.Tensor: q-values for all observations and actions, size: batch_size x actions_size
        """
        x = functional.relu(self.FC1(x))
        x = functional.relu(self.FC2(x))
        return self.FC3(x)

    def reset(self) -> None:
        """
        Resets the weights of the neural network layers.
        Returns:
            None
        """
        torch.nn.init.xavier_uniform_(self.FC1.weight.data)
        torch.nn.init.xavier_uniform_(self.FC2.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3.weight.data)
        if self.noisy_net:
            self.reset_noise()

    def reset_noise(self) -> None:
        """
        Resets the noise of the noisy layers.
        """
        self.FC1.reset_noise()
        self.FC2.reset_noise()
        self.FC3.reset_noise()
Exemplo n.º 9
0
 def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None:
     """
     Initializes the neural network.
     Args:
         states_size: Size of the input space.
         action_size:Size of the action space.
         settings: dictionary with settings, currently not used.
     """
     super(DuelDQN, self).__init__()
     self.batch_size = settings["batch_size"]
     layers_size = settings["layers_sizes"][0]
     self.noisy_net = settings['noisy_nets']
     if not self.noisy_net:
         self.FC1 = nn.Linear(int(states_size), layers_size)
         self.FC2 = nn.Linear(layers_size, layers_size)
         self.FC3v = nn.Linear(layers_size, 1)
         self.FC3a = nn.Linear(layers_size, int(action_size))
     else:
         self.FC1 = NoisyLinear(int(states_size), layers_size)
         self.FC2 = NoisyLinear(layers_size, layers_size)
         self.FC3v = NoisyLinear(layers_size, 1)
         self.FC3a = NoisyLinear(layers_size, int(action_size))
     self.reset()
Exemplo n.º 10
0
class DuelDQN(Qnet):
    def __init__(self, states_size: np.ndarray, action_size: np.ndarray, settings: dict) -> None:
        """
        Initializes the neural network.
        Args:
            states_size: Size of the input space.
            action_size:Size of the action space.
            settings: dictionary with settings, currently not used.
        """
        super(DuelDQN, self).__init__()
        self.batch_size = settings["batch_size"]
        layers_size = settings["layers_sizes"][0]
        self.noisy_net = settings['noisy_nets']
        if not self.noisy_net:
            self.FC1 = nn.Linear(int(states_size), layers_size)
            self.FC2 = nn.Linear(layers_size, layers_size)
            self.FC3v = nn.Linear(layers_size, 1)
            self.FC3a = nn.Linear(layers_size, int(action_size))
        else:
            self.FC1 = NoisyLinear(int(states_size), layers_size)
            self.FC2 = NoisyLinear(layers_size, layers_size)
            self.FC3v = NoisyLinear(layers_size, 1)
            self.FC3a = NoisyLinear(layers_size, int(action_size))
        self.reset()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward step of the duelling q-network
        Args:
            x(torch.Tensor): observation or a batch of observations

        Returns:
            torch.Tensor: q-values for all  observations and actions
        """
        x = functional.relu(self.FC1(x))
        x = functional.relu(self.FC2(x))
        v = self.FC3v(x)
        a = self.FC3a(x)
        if x.ndimension() == 1:
            qvals = v + (a - torch.mean(a))
        else:
            qvals = v + (a - torch.mean(a, 1, True))
        return qvals

    def reset(self) -> None:
        """
        Resets the weights of the neural network layers.
        Returns:
            None
        """
        torch.nn.init.xavier_uniform_(self.FC1.weight.data)
        torch.nn.init.xavier_uniform_(self.FC2.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3a.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3v.weight.data)
        if self.noisy_net:
            self.reset_noise()

    def reset_noise(self) -> None:
        """
        Resets the noise of the noisy layers.
        """
        self.FC1.reset_noise()
        self.FC2.reset_noise()
        self.FC3a.reset_noise()
        self.FC3v.reset_noise()
Exemplo n.º 11
0
class BaseModelNoisy(AACBase):
    def __init__(self, in_channels, button_num, variable_num, frame_num):
        super(BaseModelNoisy, self).__init__()
        self.screen_feature_num = 256
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2)
        self.conv6 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2)

        self.screen_features1 = nn.Linear(512 * 2 * 4, self.screen_feature_num)
        #self.screen_features1 = nn.Linear(128 * 6 * 9, self.screen_feature_num)
        #self.screen_features1 = nn.Linear(64 * 14 * 19, self.screen_feature_num)

        self.batch_norm = nn.BatchNorm1d(self.screen_feature_num)

        layer1_size = 128
        self.action1 = NoisyLinear(self.screen_feature_num, layer1_size)
        self.action2 = NoisyLinear(layer1_size + variable_num, button_num)

        self.value1 = NoisyLinear(self.screen_feature_num, layer1_size)
        self.value2 = NoisyLinear(layer1_size + variable_num, 1)

        self.screens = None
        self.frame_num = frame_num


    def forward(self, screen, variables):
        # cnn
        screen_features = F.relu(self.conv1(screen))
        screen_features = F.relu(self.conv2(screen_features))
        screen_features = F.relu(self.conv3(screen_features))
        screen_features = F.relu(self.conv4(screen_features))
        screen_features = F.relu(self.conv5(screen_features))
        screen_features = F.relu(self.conv6(screen_features))
        screen_features = screen_features.view(screen_features.size(0), -1)

        # features
        input = self.screen_features1(screen_features)
        input = self.batch_norm(input)
        input = F.relu(input)

        # action
        action = F.relu(self.action1(input))
        action = torch.cat([action, variables], 1)
        action = self.action2(action)

        return action, input

    def transform_input(self, screen, variables):
        screen_batch = []
        if self.frame_num > 1:
            if self.screens is None:
                self.screens = [[]] * len(screen)
            for idx, screens in enumerate(self.screens):
                if len(screens) >= self.frame_num:
                    screens.pop(0)
                screens.append(screen[idx])
                if len(screens) == 1:
                    for i in range(self.frame_num - 1):
                        screens.append(screen[idx])
                screen_batch.append(torch.cat(screens, 0))
            screen = torch.stack(screen_batch)

        screen = Variable(screen, volatile=not self.training)
        variables = Variable(variables / 100, volatile=not self.training)
        return screen, variables

    def set_terminal(self, terminal):
        if self.screens is not None:
            indexes = torch.nonzero(terminal == 0).squeeze()
            for idx in range(len(indexes)):
                self.screens[indexes[idx]] = []

    def sample_noisy_weight(self):
        self.action1.sample()
        self.action2.sample()
        self.value1.sample()
        self.value2.sample()
Exemplo n.º 12
0
class DistributionalDuelDQN(nn.Module, DistributionalNetHelper):
    def __init__(self, states_size: int, action_size: int, settings: dict,
                 device: torch.device) -> None:
        """
        Initializes the DistributionalDuelDqn
        Args:
            states_size (int): Size of the input space.
            action_size (int):Size of the action space.
            settings (dict): dictionary with settings
            device( torch.device): "gpu" or "cpu"
        """
        super(DistributionalDuelDQN, self).__init__()
        DistributionalNetHelper.__init__(self,
                                         settings,
                                         neural_network_call=self.forward,
                                         device=device)
        self.batch_size = settings["batch_size"]
        self.number_atoms = settings["number_atoms"]
        layers_size = settings["layers_sizes"][0]
        self.noisy_net = settings['noisy_nets']
        if not self.noisy_net:
            self.FC1 = nn.Linear(int(states_size), layers_size)
            self.FC2 = nn.Linear(layers_size, layers_size)
            self.FC3v = nn.Linear(layers_size, self.number_atoms)
            self.FC3a = nn.Linear(layers_size,
                                  int(action_size * self.number_atoms))
        else:
            self.FC1 = NoisyLinear(int(states_size), layers_size)
            self.FC2 = NoisyLinear(layers_size, layers_size)
            self.FC3v = NoisyLinear(layers_size, self.number_atoms)
            self.FC3a = NoisyLinear(layers_size,
                                    int(action_size) * self.number_atoms)
        self.reset()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward pass of the distributional neural networ
        Args:
            x(torch.Tensor): a batch of observations
        Returns:
            torch.Tensor: distributions for each sample and action, size: batch_size x action_size x number_atoms
        """
        if x.ndimension() == 1:
            batch_size = 1
        else:
            batch_size = x.size()[0]
        x = nn.functional.relu(self.FC1(x))
        x = nn.functional.relu(self.FC2(x))
        a = self.FC3a(x)
        v = self.FC3v(x)

        a = a.view([batch_size, -1, self.number_atoms])
        average = a.mean(1).unsqueeze(1)
        a_scaled = a - average
        if batch_size > 1:
            v = v.unsqueeze(1)
        return_vals = v + a_scaled
        return return_vals

    def reset(self) -> None:
        """
        Resets the weights of the neural network layers and the noise of the noisy layers.
        Returns:
            None
        """
        torch.nn.init.xavier_uniform_(self.FC1.weight.data)
        torch.nn.init.xavier_uniform_(self.FC2.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3a.weight.data)
        torch.nn.init.xavier_uniform_(self.FC3v.weight.data)
        if self.noisy_net:
            self.reset_noise()

    def reset_noise(self) -> None:
        """
        Samples noise for the noisy layers.
        """
        self.FC1.reset_noise()
        self.FC2.reset_noise()
        self.FC3a.reset_noise()
        self.FC3v.reset_noise()