Exemplo n.º 1
0
    def __init__(self,
                 in_features: int,
                 out_features: int,
                 out_scale: float = 1,
                 **kwargs):
        """
        Parameters:
            size: Observation's dimensionality upon sampling.

        """
        super(GaussianPolicy, self).__init__()

        self.in_features: int = in_features
        self.out_features: int = out_features
        self.out_scale = out_scale

        hidden_layers = kwargs.get("hidden_layers")
        self.dist = Normal
        self.mu = FcNet(in_features,
                        out_features,
                        hidden_layers=hidden_layers,
                        **kwargs)
        self.log_std = FcNet(in_features,
                             out_features,
                             hidden_layers=hidden_layers,
                             **kwargs)

        self.log_std_min = -10
        self.log_std_max = 2

        self._last_dist: Optional[Distribution] = None
        self._last_samples: Optional[torch.Tensor] = None
Exemplo n.º 2
0
    def __init__(
        self,
        num_atoms: int = 21,
        v_min: float = -20.,
        v_max: float = 20.,
        state_size: Optional[int] = None,
        action_size: Optional[int] = None,
        hidden_layers: Sequence[int] = (200, 200),
        net: Optional[NetworkType] = None,
        device: Optional[torch.device] = None,
    ):
        """
        Parameters:
            num_atoms: Number of atoms that disceritze the probability distrubition.
            v_min: Minimum (edge) value of the shifted distribution.
            v_max: Maximum (edge) value of the shifted distribution.
            net: (Optional) A network used for estimation. If `net` is proved then `hidden_layers` has no effect.
            state_size: Size of the state.
            action_size: Length of the output.
            hidden_layers: Shape of the hidden layers that are fully connected networks.

        *Note* that either `net` or both (`state_size`, `action_size`) need to be not None.
        If `state_size` and `action_size` are provided then the default net is created as
        fully connected network with `hidden_layers` size.

        """
        super(CategoricalNet, self).__init__()
        self.device = device
        self.num_atoms = num_atoms
        self.v_min = v_min
        self.v_max = v_max
        self.z_atoms = torch.linspace(v_min, v_max, num_atoms, device=device)
        self.z_delta = self.z_atoms[1] - self.z_atoms[0]

        if net is not None:
            self.net = net
        elif state_size is not None and action_size is not None:
            self.net = FcNet(state_size,
                             action_size * self.num_atoms,
                             hidden_layers=hidden_layers,
                             device=self.device)
        else:
            raise ValueError(
                "CategoricalNet needs to be instantiated either with `net` or (`state_size` and `action_size`)"
            )

        self.in_featores = self.net.in_features
        self.out_features = (self.net.out_features // self.num_atoms,
                             self.num_atoms)
        self.to(device=device)
def network_fn(state_dim, output_dim, device):
    conv_net = ConvNet(state_dim, hidden_layers=(10, 10), device=device)
    return NetChainer(net_classes=[
        ScaleNet(scale=1. / 255),
        conv_net,
        FlattenNet(),
        FcNet(conv_net.output_size,
              output_dim,
              hidden_layers=(100, 100, 50),
              device=device),
    ])
Exemplo n.º 4
0
 def __init__(self,
              input_shape: Sequence[int],
              output_shape: Sequence[int],
              hidden_layers: Sequence[int],
              net_fn: Optional[Callable[..., NetworkType]] = None,
              net_class: Optional[NetworkTypeClass] = None,
              **kwargs):
     """
     Parameters:
         input_shape (Tuple of ints):
             Shape of the input. Even in case when input is 1D, a single item tuple is expected, e.g. (4,).
         output_shape (Tuple of ints):
             Shape of the output. Same as with the `input_shape`.
     """
     super(DuelingNet, self).__init__()
     device = kwargs.get("device")
     # We only care about the leading size, e.g. (4,) -> 4
     if net_fn is not None:
         self.value_net = net_fn(input_shape, (1, ),
                                 hidden_layers=hidden_layers)
         self.advantage_net = net_fn(input_shape,
                                     output_shape,
                                     hidden_layers=hidden_layers)
     elif net_class is not None:
         self.value_net = net_class(input_shape, (1, ),
                                    hidden_layers=hidden_layers,
                                    device=device)
         self.advantage_net = net_class(input_shape,
                                        output_shape,
                                        hidden_layers=hidden_layers,
                                        device=device)
     else:
         self.value_net = FcNet(input_shape, (1, ),
                                hidden_layers=hidden_layers,
                                gate_out=None,
                                device=device)
         self.advantage_net = FcNet(input_shape,
                                    output_shape,
                                    hidden_layers=hidden_layers,
                                    gate_out=None,
                                    device=device)
Exemplo n.º 5
0
class DuelingNet(NetworkType):
    def __init__(self,
                 input_shape: Sequence[int],
                 output_shape: Sequence[int],
                 hidden_layers: Sequence[int],
                 net_fn: Optional[Callable[..., NetworkType]] = None,
                 net_class: Optional[NetworkTypeClass] = None,
                 **kwargs):
        """
        Parameters:
            input_shape (Tuple of ints):
                Shape of the input. Even in case when input is 1D, a single item tuple is expected, e.g. (4,).
            output_shape (Tuple of ints):
                Shape of the output. Same as with the `input_shape`.
        """
        super(DuelingNet, self).__init__()
        device = kwargs.get("device")
        # We only care about the leading size, e.g. (4,) -> 4
        if net_fn is not None:
            self.value_net = net_fn(input_shape, (1, ),
                                    hidden_layers=hidden_layers)
            self.advantage_net = net_fn(input_shape,
                                        output_shape,
                                        hidden_layers=hidden_layers)
        elif net_class is not None:
            self.value_net = net_class(input_shape, (1, ),
                                       hidden_layers=hidden_layers,
                                       device=device)
            self.advantage_net = net_class(input_shape,
                                           output_shape,
                                           hidden_layers=hidden_layers,
                                           device=device)
        else:
            self.value_net = FcNet(input_shape, (1, ),
                                   hidden_layers=hidden_layers,
                                   gate_out=None,
                                   device=device)
            self.advantage_net = FcNet(input_shape,
                                       output_shape,
                                       hidden_layers=hidden_layers,
                                       gate_out=None,
                                       device=device)

    def reset_parameters(self) -> None:
        self.value_net.reset_parameters()
        self.advantage_net.reset_parameters()

    def act(self, x):
        value = self.value_net.act(x).float()
        advantage = self.advantage_net.act(x).float()
        q = value.expand_as(advantage) + (
            advantage - advantage.mean(1, keepdim=True).expand_as(advantage))
        return q

    def forward(self, x):
        value = self.value_net(x).float()
        advantage = self.advantage_net(x).float()
        q = value.expand_as(advantage) + (
            advantage - advantage.mean(1, keepdim=True).expand_as(advantage))
        return q
Exemplo n.º 6
0
def network_fn(state_dim, output_dim, device=None):
    conv_net = ConvNet(state_dim,
                       hidden_layers=(30, 30),
                       kernel_sze=(16, 8),
                       max_pool_size=(4, 2),
                       stride=(4, 2),
                       device=device)
    return NetChainer(net_classes=[
        conv_net,
        nn.Flatten(),
        FcNet(conv_net.output_size,
              output_dim,
              hidden_layers=(200, 200),
              device=device),
    ])
Exemplo n.º 7
0
def test_rainbownet_prenetwork():
    # Assign
    in_features, out_features = 10, 2
    num_atoms = 21
    pre_net_out_features = 4
    pre_network_fn = lambda in_features: FcNet(in_features,
                                               pre_net_out_features)
    net = h.RainbowNet((in_features, ), (out_features, ),
                       num_atoms=num_atoms,
                       pre_network_fn=pre_network_fn)
    test_tensor = torch.randn((1, in_features))

    # Act
    out_tensor = net(test_tensor)

    # Assert
    assert net.in_features == in_features
    assert net.out_features == out_features
    assert out_tensor.shape == (test_tensor.shape[0], out_features, num_atoms)
    assert all(['Linear' in str(layer) for layer in net.value_net.layers])
    assert all(['Linear' in str(layer) for layer in net.advantage_net.layers])
    assert all(['Linear' in str(layer) for layer in net.pre_network.layers])
    assert net.value_net.layers[0].in_features == pre_net_out_features
    assert net.advantage_net.layers[0].in_features == pre_net_out_features
Exemplo n.º 8
0
    def __init__(self,
                 input_shape: Sequence[int],
                 output_shape: Sequence[int],
                 hidden_layers=(200, 200),
                 **kwargs):
        """
        Parameters
            input_shape (tuple of ints): Shape of the single input.
            output_shape (tuple of ints): Shape of the expected output.
            hidden_layers (tuple of ints): Shape of fully connected networks. Default: (200, 200).

        Keyword parameters:
            num_atoms (int): Number of atoms used in estimating distribution. Default: 21.
            v_min (float): Value distribution minimum (left most) value. Default -10.
            v_max (float): Value distribution maximum (right most) value. Default 10.
            noisy (bool): Whether to use Noisy version of FC networks.
            pre_network_fn (func): A shared network that is used before *value* and *advantage* networks.
            device (None, str or torch.device): Device where to cast the network. Can be assigned with strings, or
                directly passing torch.device type. If `None` then it tries to use CUDA then CPU. Default: None.
        """
        super(RainbowNet, self).__init__()
        self.device = device = kwargs.get("device", None)

        self.pre_network = None
        in_features = input_shape[0]
        out_features = output_shape[0]
        if 'pre_network_fn' in kwargs:
            self.pre_network = kwargs.get("pre_network_fn")(
                in_features=input_shape)
            self.pre_netowrk_params = self.pre_network.parameters(
            )  # Registers pre_network's parameters to this module
            in_features = self.pre_network.out_features

        self.v_min = float(kwargs.get("v_min", -10))
        self.v_max = float(kwargs.get("v_max", 10))
        self.num_atoms = num_atoms = int(kwargs.get("num_atoms", 21))
        self.z_atoms = torch.linspace(self.v_min,
                                      self.v_max,
                                      self.num_atoms,
                                      device=self.device)
        self.z_delta = self.z_atoms[1] - self.z_atoms[0]

        self.noisy = kwargs.get("noisy", False)
        if self.noisy:
            self.value_net = NoisyNet(in_features,
                                      num_atoms,
                                      hidden_layers=hidden_layers,
                                      device=device)
            self.advantage_net = NoisyNet(in_features,
                                          out_features * num_atoms,
                                          hidden_layers=hidden_layers,
                                          device=device)
        else:
            self.value_net = FcNet(in_features,
                                   num_atoms,
                                   hidden_layers=hidden_layers,
                                   gate_out=None,
                                   device=device)
            self.advantage_net = FcNet((in_features, ),
                                       (out_features * num_atoms, ),
                                       hidden_layers=hidden_layers,
                                       gate_out=None,
                                       device=device)

        self.in_features = in_features if self.pre_network is None else self.pre_network.in_features
        self.out_features = out_features
        self.to(device=self.device)
Exemplo n.º 9
0
class RainbowNet(NetworkType, nn.Module):
    """Rainbow networks combines dueling and categorical networks.

    """
    def __init__(self,
                 input_shape: Sequence[int],
                 output_shape: Sequence[int],
                 hidden_layers=(200, 200),
                 **kwargs):
        """
        Parameters
            input_shape (tuple of ints): Shape of the single input.
            output_shape (tuple of ints): Shape of the expected output.
            hidden_layers (tuple of ints): Shape of fully connected networks. Default: (200, 200).

        Keyword parameters:
            num_atoms (int): Number of atoms used in estimating distribution. Default: 21.
            v_min (float): Value distribution minimum (left most) value. Default -10.
            v_max (float): Value distribution maximum (right most) value. Default 10.
            noisy (bool): Whether to use Noisy version of FC networks.
            pre_network_fn (func): A shared network that is used before *value* and *advantage* networks.
            device (None, str or torch.device): Device where to cast the network. Can be assigned with strings, or
                directly passing torch.device type. If `None` then it tries to use CUDA then CPU. Default: None.
        """
        super(RainbowNet, self).__init__()
        self.device = device = kwargs.get("device", None)

        self.pre_network = None
        in_features = input_shape[0]
        out_features = output_shape[0]
        if 'pre_network_fn' in kwargs:
            self.pre_network = kwargs.get("pre_network_fn")(
                in_features=input_shape)
            self.pre_netowrk_params = self.pre_network.parameters(
            )  # Registers pre_network's parameters to this module
            in_features = self.pre_network.out_features

        self.v_min = float(kwargs.get("v_min", -10))
        self.v_max = float(kwargs.get("v_max", 10))
        self.num_atoms = num_atoms = int(kwargs.get("num_atoms", 21))
        self.z_atoms = torch.linspace(self.v_min,
                                      self.v_max,
                                      self.num_atoms,
                                      device=self.device)
        self.z_delta = self.z_atoms[1] - self.z_atoms[0]

        self.noisy = kwargs.get("noisy", False)
        if self.noisy:
            self.value_net = NoisyNet(in_features,
                                      num_atoms,
                                      hidden_layers=hidden_layers,
                                      device=device)
            self.advantage_net = NoisyNet(in_features,
                                          out_features * num_atoms,
                                          hidden_layers=hidden_layers,
                                          device=device)
        else:
            self.value_net = FcNet(in_features,
                                   num_atoms,
                                   hidden_layers=hidden_layers,
                                   gate_out=None,
                                   device=device)
            self.advantage_net = FcNet((in_features, ),
                                       (out_features * num_atoms, ),
                                       hidden_layers=hidden_layers,
                                       gate_out=None,
                                       device=device)

        self.in_features = in_features if self.pre_network is None else self.pre_network.in_features
        self.out_features = out_features
        self.to(device=self.device)

    @lru_cache(maxsize=5)
    def _offset(self, batch_size):
        offset = torch.linspace(0, ((batch_size - 1) * self.num_atoms),
                                batch_size,
                                device=self.device)
        return offset.unsqueeze(1).expand(batch_size, self.num_atoms)

    def reset_noise(self):
        if self.noisy:
            self.value_net.reset_noise()
            self.advantage_net.reset_noise()

    def act(self, x, log_prob=False):
        """
        Parameters:
            log_prob (bool):
                Whether to return log(prob) which uses pytorch's function. According to doc it's quicker and more stable
                than taking prob.log().
        """
        with torch.no_grad():
            self.eval()
            if self.pre_network is not None:
                x = self.pre_network(x)
            value = self.value_net.act(x).view(-1, 1, self.num_atoms)
            advantage = self.advantage_net.act(x).view(-1, self.out_features,
                                                       self.num_atoms)
            q = value + advantage - advantage.mean(1, keepdim=True)
            # Doc: It's computationally quicker than log(softmax) and more stable
            out = F.softmax(q, dim=-1) if not log_prob else F.log_softmax(
                q, dim=-1)
            self.train()
        return out

    def forward(self, x, log_prob=False):
        """
        Parameters:
            log_prob (bool):
                Whether to return log(prob) which uses pytorch's function. According to doc it's quicker and more stable
                than taking prob.log().
        """
        if self.pre_network is not None:
            x = self.pre_network(x)
        value = self.value_net(x).view((-1, 1, self.num_atoms))
        advantage = self.advantage_net(x).view(-1, self.out_features,
                                               self.num_atoms)
        q = value + advantage - advantage.mean(1, keepdim=True)
        if log_prob:
            # Doc: It's computationally quicker than log(softmax) and more stable
            return F.log_softmax(q, dim=-1)
        return F.softmax(q, dim=-1)

    def dist_projection(self, rewards: torch.Tensor, masks: torch.Tensor,
                        discount: float,
                        prob_next: torch.Tensor) -> torch.Tensor:
        """
        Parameters:
            rewards: Tensor containing rewards that are used as offsets for each distrubitions.
            masks: Tensor indicating whether the iteration is terminal. Usually `masks = 1 - dones`.
            discount: Discounting value for added Q distributional estimate. Typically gamma or gamma^(n_steps).
            prob_next: Probablity estimates based on transitioned (next) states.

        """
        batch_size = rewards.shape[0]
        Tz = rewards + discount * masks * self.z_atoms.view(1, -1)
        assert Tz.shape == (batch_size, self.num_atoms)
        Tz.clamp_(self.v_min, self.v_max)  # in place

        b_idx = (Tz - self.v_min) / self.z_delta
        l_idx = b_idx.floor().to(torch.int64)
        u_idx = b_idx.ceil().to(torch.int64)

        # Fix disappearing probability mass when l = b = u (b is int)
        l_idx[(u_idx > 0) * (l_idx == u_idx)] -= 1
        u_idx[(l_idx < (self.num_atoms - 1)) * (l_idx == u_idx)] += 1

        offset = self._offset(batch_size)
        l_offset_idx = (l_idx + offset).type(torch.int64)
        u_offset_idx = (u_idx + offset).type(torch.int64)

        # Distribute probability of Tz
        m = rewards.new_zeros(batch_size * self.num_atoms)

        # Dealing with indices. *Note* not to forget batches.
        # m[l] = m[l] + p(s[t+n], a*)(u - b)
        m.index_add_(0, l_offset_idx.view(-1),
                     (prob_next * (u_idx.float() - b_idx)).view(-1))
        # m[u] = m[u] + p(s[t+n], a*)(b - l)
        m.index_add_(0, u_offset_idx.view(-1),
                     (prob_next * (b_idx - l_idx.float())).view(-1))

        return m.view(batch_size, self.num_atoms)
Exemplo n.º 10
0
class CategoricalNet(NetworkType):
    """
    Computes discrete probability distribution for the state-action Q function.

    CategoricalNet [1] learns significantly different compared to other nets here.
    For this reason it won't be suitable for simple replacement in most (current) agents.
    Please check the Agent whether it supports.

    The algorithm is used in the RainbowNet but not this particular net.

    [1] "A Distributional Perspective on Reinforcement Learning" (2017) by M. G. Bellemare, W. Dabney, R. Munos.
        Link: http://arxiv.org/abs/1707.06887
    """
    def __init__(
        self,
        num_atoms: int = 21,
        v_min: float = -20.,
        v_max: float = 20.,
        state_size: Optional[int] = None,
        action_size: Optional[int] = None,
        hidden_layers: Sequence[int] = (200, 200),
        net: Optional[NetworkType] = None,
        device: Optional[torch.device] = None,
    ):
        """
        Parameters:
            num_atoms: Number of atoms that disceritze the probability distrubition.
            v_min: Minimum (edge) value of the shifted distribution.
            v_max: Maximum (edge) value of the shifted distribution.
            net: (Optional) A network used for estimation. If `net` is proved then `hidden_layers` has no effect.
            state_size: Size of the state.
            action_size: Length of the output.
            hidden_layers: Shape of the hidden layers that are fully connected networks.

        *Note* that either `net` or both (`state_size`, `action_size`) need to be not None.
        If `state_size` and `action_size` are provided then the default net is created as
        fully connected network with `hidden_layers` size.

        """
        super(CategoricalNet, self).__init__()
        self.device = device
        self.num_atoms = num_atoms
        self.v_min = v_min
        self.v_max = v_max
        self.z_atoms = torch.linspace(v_min, v_max, num_atoms, device=device)
        self.z_delta = self.z_atoms[1] - self.z_atoms[0]

        if net is not None:
            self.net = net
        elif state_size is not None and action_size is not None:
            self.net = FcNet(state_size,
                             action_size * self.num_atoms,
                             hidden_layers=hidden_layers,
                             device=self.device)
        else:
            raise ValueError(
                "CategoricalNet needs to be instantiated either with `net` or (`state_size` and `action_size`)"
            )

        self.in_featores = self.net.in_features
        self.out_features = (self.net.out_features // self.num_atoms,
                             self.num_atoms)
        self.to(device=device)

    def reset_paramters(self):
        self.net.reset_parameters()

    def forward(self, *args) -> torch.Tensor:
        """
        Passes *args through the net with proper handling.
        """
        return self.net(*args).view((-1, ) + self.out_features)

    @lru_cache(maxsize=5)
    def _offset(self, batch_size, device=None):
        offset = torch.linspace(0, ((batch_size - 1) * self.num_atoms),
                                batch_size,
                                device=self.device)
        return offset.unsqueeze(1).expand(batch_size, self.num_atoms)

    def mean(self, values):
        return (self.z_atoms * values).mean()

    def dist_projection(self, rewards: torch.Tensor, masks: torch.Tensor,
                        discount: float,
                        prob_next: torch.Tensor) -> torch.Tensor:
        """
        Parameters:
            rewards: Tensor containing rewards that are used as offsets for each distrubitions.
            masks: Tensor indicating whether the iteration is terminal. Usually `masks = 1 - dones`.
            discount: Discounting value for added Q distributional estimate. Typically gamma or gamma^(n_steps).
            prob_next: Probablity estimates based on transitioned (next) states.

        """
        batch_size = rewards.shape[0]
        Tz = rewards + discount * masks * self.z_atoms.view(1, -1)
        assert Tz.shape == (batch_size, self.num_atoms)
        Tz.clamp_(
            self.v_min, self.v_max - 1e-4
        )  # In place. Tiny eps required for num stability e.g. ceil(1.00000001)

        b_idx = (Tz - self.v_min) / self.z_delta
        l_idx = b_idx.floor().to(torch.int64)
        u_idx = b_idx.ceil().to(torch.int64)

        # Fix disappearing probability mass when l = b = u (b is int)
        # Checking twice `l_idx == u_idx` is on purpose, since we first want to distribute to the left
        # but in cases we can't go any lower (already on the boundary) we will move them higher.
        l_idx[torch.logical_and(l_idx == u_idx, u_idx > 0)] -= 1
        u_idx[torch.logical_and(l_idx == u_idx,
                                l_idx < self.num_atoms - 1)] += 1

        offset = self._offset(batch_size)
        l_offset_idx = (l_idx + offset).type(torch.int64)
        u_offset_idx = (u_idx + offset).type(torch.int64)

        # Distribute probability of Tz
        m = rewards.new_zeros(batch_size * self.num_atoms)

        # Dealing with indices. *Note* not to forget batches.
        # m[l] = m[l] + p(s[t+n], a*)(u - b)
        m.index_add_(0, l_offset_idx.view(-1),
                     (prob_next * (u_idx.float() - b_idx)).view(-1))
        # m[u] = m[u] + p(s[t+n], a*)(b - l)
        m.index_add_(0, u_offset_idx.view(-1),
                     (prob_next * (b_idx - l_idx.float())).view(-1))

        return m.view(batch_size, self.num_atoms)
Exemplo n.º 11
0
    def __init__(self,
                 input_shape: Sequence[int],
                 output_shape: Sequence[int],
                 num_atoms: int,
                 hidden_layers=(200, 200),
                 device=None,
                 **kwargs):
        """
        Parameters
            input_shape: Shape of the single input.
            output_shape: Shape of the expected output.
            num_atoms: Number of atoms used in estimating distribution.
            pre_network_fn (func):
                A shared network that is used before *value* and *advantage* networks.
        """
        super(RainbowNet, self).__init__()
        self.device = device

        self.pre_network = None
        in_features = input_shape[0]
        out_features = output_shape[0]
        if 'pre_network_fn' in kwargs:
            self.pre_network = kwargs.get("pre_network_fn")(
                in_features=input_shape)
            self.pre_netowrk_params = self.pre_network.parameters(
            )  # Registers pre_network's parameters to this module
            in_features = self.pre_network.out_features

        self.v_min = float(kwargs.get("v_min", -10))
        self.v_max = float(kwargs.get("v_max", 10))
        self.num_atoms = int(kwargs.get("num_atoms", 21))
        self.z_atoms = torch.linspace(self.v_min,
                                      self.v_max,
                                      self.num_atoms,
                                      device=self.device)
        self.z_delta = self.z_atoms[1] - self.z_atoms[0]

        self.noisy = kwargs.get("noisy", False)
        if self.noisy:
            self.value_net = NoisyNet(in_features,
                                      num_atoms,
                                      hidden_layers=hidden_layers,
                                      device=device)
            self.advantage_net = NoisyNet(in_features,
                                          out_features * num_atoms,
                                          hidden_layers=hidden_layers,
                                          device=device)
        else:
            self.value_net = FcNet(in_features,
                                   num_atoms,
                                   hidden_layers=hidden_layers,
                                   gate_out=None,
                                   device=device)
            self.advantage_net = FcNet((in_features, ),
                                       (out_features * num_atoms, ),
                                       hidden_layers=hidden_layers,
                                       gate_out=None,
                                       device=device)

        self.in_features = in_features if self.pre_network is None else self.pre_network.in_features
        self.out_features = out_features
        self.num_atoms = num_atoms
        self.to(device=self.device)