Пример #1
0
    def __init__(
        self,
        in_channels=1,
        out_channels=1,
        latent_channels=16,
        strides=[4],
        hidden_channels=64,
        residual_channels=32,
    ):
        """Initializes a new VAE instance.

        Args:
            in_channels: Number of input channels.
            out_channels: Number of output channels.
            latent_channels: Number of channels for each latent variable.
            strides: List of encoder/decoder strides. For each stride, we create an
                encoder (decoder) which downsamples (upsamples) the input by the stride.
            hidden_channels: Number of channels in (non residual block) hidden layers.
            residual_channels: Number of hidden channels in residual blocks.
        """
        super().__init__()

        self._latent_channels = latent_channels
        self._total_stride = sum(strides)

        encoder = []
        for i, stride in enumerate(strides):
            in_c = in_channels if i == 0 else hidden_channels
            out_c = (
                hidden_channels if i < len(strides) - 1 else 2 * self._latent_channels
            )
            encoder.append(
                vaes.Encoder(
                    in_channels=in_c,
                    out_channels=out_c,
                    hidden_channels=hidden_channels,
                    residual_channels=residual_channels,
                    n_residual_blocks=2,
                    stride=stride,
                )
            )
        self._encoder = nn.Sequential(*encoder)

        decoder = []
        for i, stride in enumerate(reversed(strides)):
            in_c = self._latent_channels if i == 0 else hidden_channels
            out_c = hidden_channels if i < len(strides) - 1 else out_channels
            decoder.append(
                vaes.Decoder(
                    in_channels=in_c,
                    out_channels=out_c,
                    hidden_channels=hidden_channels,
                    residual_channels=residual_channels,
                    n_residual_blocks=2,
                    stride=stride,
                )
            )
        self._decoder = nn.Sequential(*decoder)
Пример #2
0
    def __init__(
        self,
        in_channels=1,
        out_channels=1,
        in_size=28,
        latent_dim=10,
        hidden_channels=32,
        n_residual_blocks=2,
        residual_channels=16,
    ):
        """Initializes a new VAE instance.

        Args:
            in_channels: Number of input channels.
            out_channels: Number of output channels.
            in_size: Size of the input images. Used to create bottleneck layers.
            latent_dim: The dimensionality of each latent variable.
            hidden_channels: Number of channels in (non residual block) hidden layers.
            n_residual_blocks: Number of residual blocks in each residual stack.
            residual_channels: Number of hidden channels in residual blocks.
        """
        super().__init__()

        self._latent_dim = latent_dim
        self._encoder_out_dims = (hidden_channels, in_size // 4, in_size // 4)

        self._encoder = vaes.Encoder(
            in_channels=in_channels,
            out_channels=hidden_channels,
            hidden_channels=hidden_channels,
            residual_channels=residual_channels,
            n_residual_blocks=n_residual_blocks,
            stride=4,
        )
        flat_dim = hidden_channels * in_size // 4 * in_size // 4
        self._mean = nn.Linear(flat_dim, self._latent_dim)
        self._log_var = nn.Linear(flat_dim, self._latent_dim)
        self._bottleneck = nn.Linear(self._latent_dim, flat_dim)
        self._decoder_ = vaes.Decoder(
            in_channels=hidden_channels,
            out_channels=out_channels,
            hidden_channels=hidden_channels,
            residual_channels=residual_channels,
            n_residual_blocks=n_residual_blocks,
            stride=4,
        )
Пример #3
0
    def __init__(
        self,
        in_channels=1,
        out_channels=1,
        hidden_channels=128,
        n_residual_blocks=2,
        residual_channels=32,
        n_embeddings=128,
        embedding_dim=16,
    ):
        """Initializes a new VQVAE instance.

        Args:
            in_channels: Number of input channels.
            out_channels: Number of output channels.
            hidden_channels: Number of channels in (non residual block) hidden layers.
            n_residual_blocks: Number of residual blocks in each residual stack.
            residual_channels: Number of hidden channels in residual blocks.
            n_embeddings: Number of VectorQuantizer embeddings.
            embedding_dim: Dimension of the VectorQuantizer embeddings.
        """
        super().__init__()
        self._encoder = vaes.Encoder(
            in_channels=in_channels,
            out_channels=hidden_channels,
            hidden_channels=hidden_channels,
            n_residual_blocks=n_residual_blocks,
            residual_channels=residual_channels,
            stride=4,
        )
        self._quantizer = vaes.Quantizer(
            in_channels=hidden_channels,
            n_embeddings=n_embeddings,
            embedding_dim=embedding_dim,
        )
        self._decoder = vaes.Decoder(
            in_channels=embedding_dim,
            out_channels=out_channels,
            hidden_channels=hidden_channels,
            n_residual_blocks=n_residual_blocks,
            residual_channels=residual_channels,
            stride=4,
        )
Пример #4
0
    def __init__(
        self,
        in_channels=1,
        out_channels=1,
        latent_channels=2,
        hidden_channels=128,
        residual_channels=32,
    ):
        """Initializes a new VAE instance.

        Args:
            in_channels: Number of input channels.
            out_channels: Number of output channels.
            latent_channels: Number of channels for each latent variable.
            hidden_channels: Number of channels in (non residual block) hidden layers.
            residual_channels: Number of hidden channels in residual blocks.
        """
        super().__init__()

        self._latent_channels = latent_channels
        self._stride = 4

        self._encoder = vaes.Encoder(
            in_channels=in_channels,
            out_channels=2 * self._latent_channels,
            hidden_channels=hidden_channels,
            residual_channels=residual_channels,
            n_residual_blocks=2,
            stride=self._stride,
        )
        self._decoder = vaes.Decoder(
            in_channels=self._latent_channels,
            out_channels=out_channels,
            hidden_channels=hidden_channels,
            residual_channels=residual_channels,
            n_residual_blocks=2,
            stride=self._stride,
        )