Пример #1
0
    def __init__(self,
                 input_channels,
                 input_size,
                 n,
                 conv_layer_params,
                 same_padding=False,
                 activation=torch.relu_,
                 kernel_initializer=None,
                 flatten_output=False,
                 name="ParallelImageEncodingNetwork"):
        """
        Args:
            input_channels (int): number of channels in the input image
            input_size (int or tuple): the input image size (height, width)
            n (int): number of parallel networks
            conv_layer_params (tuppe[tuple]): a non-empty tuple of
                tuple (num_filters, kernel_size, strides, padding), where
                padding is optional
            same_padding (bool): similar to TF's conv2d ``same`` padding mode. If
                True, the user provided paddings in `conv_layer_params` will be
                replaced by automatically calculated ones; if False, it
                corresponds to TF's ``valid`` padding mode (the user can still
                provide custom paddings though)
            activation (torch.nn.functional): activation for all the layers
            kernel_initializer (Callable): initializer for all the layers.
            flatten_output (bool): If False, the output will be an image
                structure of shape ``(B, n, C, H, W)``; otherwise the output
                will be flattened into a feature of shape ``(B, n, C*H*W)``.
        """
        input_size = common.tuplify2d(input_size)
        super().__init__(input_tensor_spec=TensorSpec((input_channels, ) +
                                                      input_size),
                         name=name)

        assert isinstance(conv_layer_params, tuple)
        assert len(conv_layer_params) > 0

        self._flatten_output = flatten_output
        self._conv_layer_params = conv_layer_params
        self._conv_layers = nn.ModuleList()
        for paras in conv_layer_params:
            filters, kernel_size, strides = paras[:3]
            padding = paras[3] if len(paras) > 3 else 0
            if same_padding:  # overwrite paddings
                kernel_size = common.tuplify2d(kernel_size)
                padding = ((kernel_size[0] - 1) // 2,
                           (kernel_size[1] - 1) // 2)
            self._conv_layers.append(
                layers.ParallelConv2D(input_channels,
                                      filters,
                                      kernel_size,
                                      n,
                                      activation=activation,
                                      kernel_initializer=kernel_initializer,
                                      strides=strides,
                                      padding=padding))
            input_channels = filters
Пример #2
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 activation=torch.relu_,
                 strides=1,
                 pooling_kernel=None,
                 padding=0,
                 use_bias=False):
        """A 2D conv layer that does not maintain its own weight and bias,
        but accepts both from users. If the given parameter (weight and bias) 
        tensor has an extra batch dimension (first dimension), it performs
        parallel FC operation. 

        Args:
            in_channels (int): channels of the input image
            out_channels (int): channels of the output image
            kernel_size (int or tuple):
            activation (torch.nn.functional):
            strides (int or tuple):
            pooling_kernel (int or tuple):
            padding (int or tuple):
            use_bias (bool): whether use bias
        """
        super(ParamConv2D, self).__init__()

        self._in_channels = in_channels
        self._out_channels = out_channels
        self._activation = activation
        self._kernel_size = common.tuplify2d(kernel_size)
        self._kH, self._kW = self._kernel_size
        self._strides = strides
        self._pooling_kernel = pooling_kernel
        self._padding = padding
        self._use_bias = use_bias

        self._weight_length = out_channels * in_channels * self._kH * self._kW
        self.set_weight(torch.randn(1, self._weight_length))
        if use_bias:
            self._bias_length = out_channels
            self.set_bias(torch.randn(1, self._bias_length))
        else:
            self._bias_length = 0
            self._bias = None
Пример #3
0
    def __init__(self,
                 input_channels,
                 input_size,
                 conv_layer_params,
                 same_padding=False,
                 activation=torch.relu_,
                 kernel_initializer=None,
                 flatten_output=False,
                 name="ImageEncodingNetwork"):
        """
        Initialize the layers for encoding an image into a latent vector.
        Currently there seems no need for this class to handle nested inputs;
        If necessary, extend the argument list to support it in the future.

        How to calculate the output size:
        `<https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d>`_::

            H = (H1 - HF + 2P) // strides + 1

        where H = output size, H1 = input size, HF = size of kernel, P = padding.

        Regarding padding: in the previous TF version, we have two padding modes:
        ``valid`` and ``same``. For the former, we always have no padding (P=0); for
        the latter, it's also called "half padding" (P=(HF-1)//2 when strides=1
        and HF is an odd number the output has the same size with the input.
        Currently, PyTorch don't support different left and right paddings and
        P is always (HF-1)//2. So if HF is an even number, the output size will
        decrease by 1 when strides=1).

        Args:
            input_channels (int): number of channels in the input image
            input_size (int or tuple): the input image size (height, width)
            conv_layer_params (tuppe[tuple]): a non-empty tuple of
                tuple (num_filters, kernel_size, strides, padding), where
                padding is optional
            same_padding (bool): similar to TF's conv2d ``same`` padding mode. If
                True, the user provided paddings in `conv_layer_params` will be
                replaced by automatically calculated ones; if False, it
                corresponds to TF's ``valid`` padding mode (the user can still
                provide custom paddings though)
            activation (torch.nn.functional): activation for all the layers
            kernel_initializer (Callable): initializer for all the layers.
            flatten_output (bool): If False, the output will be an image
                structure of shape ``BxCxHxW``; otherwise the output will be
                flattened into a feature of shape ``BxN``.
        """
        input_size = common.tuplify2d(input_size)
        super().__init__(input_tensor_spec=TensorSpec((input_channels, ) +
                                                      input_size),
                         name=name)

        assert isinstance(conv_layer_params, tuple)
        assert len(conv_layer_params) > 0

        self._flatten_output = flatten_output
        self._conv_layer_params = conv_layer_params
        self._conv_layers = nn.ModuleList()
        for paras in conv_layer_params:
            filters, kernel_size, strides = paras[:3]
            padding = paras[3] if len(paras) > 3 else 0
            if same_padding:  # overwrite paddings
                kernel_size = common.tuplify2d(kernel_size)
                padding = ((kernel_size[0] - 1) // 2,
                           (kernel_size[1] - 1) // 2)
            self._conv_layers.append(
                layers.Conv2D(input_channels,
                              filters,
                              kernel_size,
                              activation=activation,
                              kernel_initializer=kernel_initializer,
                              strides=strides,
                              padding=padding))
            input_channels = filters
Пример #4
0
    def __init__(self,
                 input_size,
                 n,
                 transconv_layer_params,
                 start_decoding_size,
                 start_decoding_channels,
                 same_padding=False,
                 preprocess_fc_layer_params=None,
                 activation=torch.relu_,
                 kernel_initializer=None,
                 output_activation=torch.tanh,
                 name="ImageDecodingNetwork"):
        """
        Args:
            input_size (int): the size of the input latent vector
            n (int): number of parallel networks
            transconv_layer_params (tuple[tuple]): a non-empty
                tuple of tuple (num_filters, kernel_size, strides, padding),
                where ``padding`` is optional.
            start_decoding_size (int or tuple): the initial height and width
                we'd like to have for the feature map
            start_decoding_channels (int): the initial number of channels we'd
                like to have for the feature map. Note that we always first
                project an input latent vector into a vector of an appropriate
                length so that it can be reshaped into (``start_decoding_channels``,
                ``start_decoding_height``, ``start_decoding_width``).
            same_padding (bool): similar to TF's conv2d ``same`` padding mode. If
                True, the user provided paddings in ``transconv_layer_params`` will
                be replaced by automatically calculated ones; if False, it
                corresponds to TF's ``valid`` padding mode (the user can still
                provide custom paddings though).
            preprocess_fc_layer_params (tuple[int]): a tuple of fc
                layer units. These fc layers are used for preprocessing the
                latent vector before transposed convolutions.
            activation (nn.functional): activation for hidden layers
            kernel_initializer (Callable): initializer for all the layers.
            output_activation (nn.functional): activation for the output layer.
                Usually our image inputs are normalized to [0, 1] or [-1, 1],
                so this function should be ``torch.sigmoid`` or
                ``torch.tanh``.
            name (str):
        """
        super().__init__(input_tensor_spec=TensorSpec((input_size, )),
                         name=name)

        assert isinstance(transconv_layer_params, tuple)
        assert len(transconv_layer_params) > 0

        self._preprocess_fc_layers = nn.ModuleList()
        if preprocess_fc_layer_params is not None:
            for size in preprocess_fc_layer_params:
                self._preprocess_fc_layers.append(
                    layers.ParallelFC(input_size,
                                      size,
                                      n,
                                      activation=activation,
                                      kernel_initializer=kernel_initializer))
                input_size = size

        start_decoding_size = common.tuplify2d(start_decoding_size)
        # pytorch assumes "channels_first" !
        self._start_decoding_shape = [
            start_decoding_channels, start_decoding_size[0],
            start_decoding_size[1]
        ]
        self._preprocess_fc_layers.append(
            layers.ParallelFC(input_size,
                              np.prod(self._start_decoding_shape),
                              n,
                              activation=activation,
                              kernel_initializer=kernel_initializer))

        self._transconv_layer_params = transconv_layer_params
        self._transconv_layers = nn.ModuleList()
        in_channels = start_decoding_channels
        for i, paras in enumerate(transconv_layer_params):
            filters, kernel_size, strides = paras[:3]
            padding = paras[3] if len(paras) > 3 else 0
            if same_padding:  # overwrite paddings
                kernel_size = common.tuplify2d(kernel_size)
                padding = ((kernel_size[0] - 1) // 2,
                           (kernel_size[1] - 1) // 2)
            act = activation
            if i == len(transconv_layer_params) - 1:
                act = output_activation
            self._transconv_layers.append(
                layers.ParallelConvTranspose2D(
                    in_channels,
                    filters,
                    kernel_size,
                    n,
                    activation=act,
                    kernel_initializer=kernel_initializer,
                    strides=strides,
                    padding=padding))
            in_channels = filters
        self._n = n
Пример #5
0
    def __init__(self,
                 input_size,
                 transconv_layer_params,
                 start_decoding_size,
                 start_decoding_channels,
                 same_padding=False,
                 preprocess_fc_layer_params=None,
                 activation=torch.relu_,
                 kernel_initializer=None,
                 output_activation=torch.tanh,
                 name="ImageDecodingNetwork"):
        """
        Initialize the layers for decoding a latent vector into an image.
        Currently there seems no need for this class to handle nested inputs;
        If necessary, extend the argument list to support it in the future.

        How to calculate the output size:
        `<https://pytorch.org/docs/stable/nn.html#torch.nn.ConvTranspose2d>`_::

            H = (H1-1) * strides + HF - 2P + OP

        where H = output size, H1 = input size, HF = size of kernel, P = padding,
        OP = output_padding (currently hardcoded to be 0 for this class).

        Regarding padding: in the previous TF version, we have two padding modes:
        ``valid`` and ``same``. For the former, we always have no padding (P=0); for
        the latter, it's also called ``half padding`` (P=(HF-1)//2 when strides=1
        and HF is an odd number the output has the same size with the input.
        Currently, PyTorch doesn't support different left and right paddings and
        P is always (HF-1)//2. So if HF is an even number, the output size will
        increaseby 1 when strides=1).

        Args:
            input_size (int): the size of the input latent vector
            transconv_layer_params (tuple[tuple]): a non-empty
                tuple of tuple (num_filters, kernel_size, strides, padding),
                where ``padding`` is optional.
            start_decoding_size (int or tuple): the initial height and width
                we'd like to have for the feature map
            start_decoding_channels (int): the initial number of channels we'd
                like to have for the feature map. Note that we always first
                project an input latent vector into a vector of an appropriate
                length so that it can be reshaped into (``start_decoding_channels``,
                ``start_decoding_height``, ``start_decoding_width``).
            same_padding (bool): similar to TF's conv2d ``same`` padding mode. If
                True, the user provided paddings in ``transconv_layer_params`` will
                be replaced by automatically calculated ones; if False, it
                corresponds to TF's ``valid`` padding mode (the user can still
                provide custom paddings though).
            preprocess_fc_layer_params (tuple[int]): a tuple of fc
                layer units. These fc layers are used for preprocessing the
                latent vector before transposed convolutions.
            activation (nn.functional): activation for hidden layers
            kernel_initializer (Callable): initializer for all the layers.
            output_activation (nn.functional): activation for the output layer.
                Usually our image inputs are normalized to [0, 1] or [-1, 1],
                so this function should be ``torch.sigmoid`` or
                ``torch.tanh``.
            name (str):
        """
        super().__init__(input_tensor_spec=TensorSpec((input_size, )),
                         name=name)

        assert isinstance(transconv_layer_params, tuple)
        assert len(transconv_layer_params) > 0

        self._preprocess_fc_layers = nn.ModuleList()
        if preprocess_fc_layer_params is not None:
            for size in preprocess_fc_layer_params:
                self._preprocess_fc_layers.append(
                    layers.FC(input_size,
                              size,
                              activation=activation,
                              kernel_initializer=kernel_initializer))
                input_size = size

        start_decoding_size = common.tuplify2d(start_decoding_size)
        # pytorch assumes "channels_first" !
        self._start_decoding_shape = [
            start_decoding_channels, start_decoding_size[0],
            start_decoding_size[1]
        ]
        self._preprocess_fc_layers.append(
            layers.FC(input_size,
                      np.prod(self._start_decoding_shape),
                      activation=activation,
                      kernel_initializer=kernel_initializer))

        self._transconv_layer_params = transconv_layer_params
        self._transconv_layers = nn.ModuleList()
        in_channels = start_decoding_channels
        for i, paras in enumerate(transconv_layer_params):
            filters, kernel_size, strides = paras[:3]
            padding = paras[3] if len(paras) > 3 else 0
            if same_padding:  # overwrite paddings
                kernel_size = common.tuplify2d(kernel_size)
                padding = ((kernel_size[0] - 1) // 2,
                           (kernel_size[1] - 1) // 2)
            act = activation
            if i == len(transconv_layer_params) - 1:
                act = output_activation
            self._transconv_layers.append(
                layers.ConvTranspose2D(in_channels,
                                       filters,
                                       kernel_size,
                                       activation=act,
                                       kernel_initializer=kernel_initializer,
                                       strides=strides,
                                       padding=padding))
            in_channels = filters
Пример #6
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 n,
                 activation=torch.relu_,
                 strides=1,
                 padding=0,
                 use_bias=None,
                 use_bn=False,
                 kernel_initializer=None,
                 kernel_init_gain=1.0,
                 bias_init_value=0.0):
        """A parallel ConvTranspose2D layer that can be used to perform n
        independent 2D transposed convolutions in parallel.

        Args:
            in_channels (int): channels of the input image
            out_channels (int): channels of the output image
            kernel_size (int or tuple):
            n (int): n independent ``ConvTranspose2D`` layers
            activation (torch.nn.functional):
            strides (int or tuple):
            padding (int or tuple):
            use_bias (bool|None): If None, will use ``not use_bn``
            use_bn (bool):
            kernel_initializer (Callable): initializer for the conv_trans layer.
                If None is provided a ``variance_scaling_initializer`` with gain
                as ``kernel_init_gain`` will be used.
            kernel_init_gain (float): a scaling factor (gain) applied to the
                std of kernel init distribution. It will be ignored if
                ``kernel_initializer`` is not None.
            bias_init_value (float): a constant
        """
        super(ParallelConvTranspose2D, self).__init__()
        if use_bias is None:
            use_bias = not use_bn
        self._activation = activation
        self._n = n
        self._in_channels = in_channels
        self._out_channels = out_channels
        self._kernel_size = common.tuplify2d(kernel_size)
        self._conv_trans2d = nn.ConvTranspose2d(in_channels * n,
                                                out_channels * n,
                                                kernel_size,
                                                groups=n,
                                                stride=strides,
                                                padding=padding,
                                                bias=use_bias)

        for i in range(n):
            if kernel_initializer is None:
                variance_scaling_init(
                    self._conv_trans2d.weight.data[i * in_channels:(i + 1) *
                                                   in_channels],
                    gain=kernel_init_gain,
                    nonlinearity=self._activation)
            else:
                kernel_initializer(
                    self._conv_trans2d.weight.data[i * in_channels:(i + 1) *
                                                   in_channels])

        # [n*C, C', kernel_size, kernel_size]->[n, C, C', kernel_size, kernel_size]
        self._weight = self._conv_trans2d.weight.view(self._n,
                                                      self._in_channels,
                                                      self._out_channels,
                                                      self._kernel_size[0],
                                                      self._kernel_size[1])

        if use_bias:
            nn.init.constant_(self._conv_trans2d.bias.data, bias_init_value)
            # [n*C]->[n, C]
            self._bias = self._conv_trans2d.bias.view(self._n,
                                                      self._out_channels)
        else:
            self._bias = None

        if use_bn:
            self._bn = nn.BatchNorm2d(n * out_channels)
        else:
            self._bn = None
Пример #7
0
    def __init__(self,
                 input_channels,
                 input_size,
                 conv_layer_params,
                 same_padding=False,
                 activation=torch.relu_,
                 use_bias=False,
                 flatten_output=False,
                 name="ParamConvNet"):
        """A fully 2D conv network that does not maintain its own network parameters,
        but accepts them from users. If the given parameter tensor has an extra batch
        dimension (first dimension), it performs parallel operations.

        Args:
            input_channels (int): number of channels in the input image
            input_size (int or tuple): the input image size (height, width)
            conv_layer_params (tuple[tuple]): a tuple of tuples where each
                tuple takes a format 
                ``(filters, kernel_size, strides, padding, pooling_kernel)``,
                where ``padding`` and ``pooling_kernel`` are optional.
            same_padding (bool): similar to TF's conv2d ``same`` padding mode. If
                True, the user provided paddings in `conv_layer_params` will be
                replaced by automatically calculated ones; if False, it
                corresponds to TF's ``valid`` padding mode (the user can still
                provide custom paddings though)
            activation (torch.nn.functional): activation for all the layers
            use_bias (bool): whether use bias
            flatten_output (bool): If False, the output will be an image
                structure of shape ``(B, n, C, H, W)``; otherwise the output
                will be flattened into a feature of shape ``(B, n, C*H*W)``.
            name (str):
        """

        input_size = common.tuplify2d(input_size)
        super().__init__(input_tensor_spec=TensorSpec((input_channels, ) +
                                                      input_size),
                         name=name)

        assert isinstance(conv_layer_params, tuple)
        assert len(conv_layer_params) > 0

        self._flatten_output = flatten_output
        self._conv_layer_params = conv_layer_params
        self._conv_layers = nn.ModuleList()
        self._param_length = None
        for paras in conv_layer_params:
            filters, kernel_size, strides = paras[:3]
            padding = paras[3] if len(paras) > 3 else 0
            pooling_kernel = paras[4] if len(paras) > 4 else None
            if same_padding:  # overwrite paddings
                kernel_size = common.tuplify2d(kernel_size)
                padding = ((kernel_size[0] - 1) // 2,
                           (kernel_size[1] - 1) // 2)
            self._conv_layers.append(
                ParamConv2D(input_channels,
                            filters,
                            kernel_size,
                            activation=activation,
                            strides=strides,
                            pooling_kernel=pooling_kernel,
                            padding=padding,
                            use_bias=use_bias))
            input_channels = filters