Exemplo n.º 1
0
    def __init__(self, code_size, img_size, kernel_size=4, num_input_channels=3, num_feature_maps=64, batch_norm=True):
        super(Image_Encoder, self).__init__()

        # Get the dimension which is a power of 2
        if is_power2(max(img_size)):
            stable_dim = max(img_size)
        else:
            stable_dim = min(img_size)

        if isinstance(img_size, tuple):
            self.img_size = img_size
            self.final_size = tuple(int(4 * x // stable_dim) for x in self.img_size)
        else:
            self.img_size = (img_size, img_size)
            self.final_size = (4, 4)

        self.code_size = code_size
        self.num_feature_maps = num_feature_maps
        self.cl = nn.ModuleList()
        self.num_layers = int(np.log2(max(self.img_size))) - 2

        stride = 2
        # This ensures that we have same padding no matter if we have even or odd kernels
        padding = calculate_padding(kernel_size, stride)

        if batch_norm:
            self.cl.append(nn.Sequential(
                nn.Conv2d(num_input_channels, self.num_feature_maps, kernel_size, stride=stride, padding=padding // 2,
                          bias=False),
                nn.BatchNorm2d(self.num_feature_maps),
                nn.ReLU(True)))
        else:
            self.cl.append(nn.Sequential(
                nn.Conv2d(num_input_channels, self.num_feature_maps, kernel_size, stride=stride, padding=padding // 2,
                          bias=False),
                nn.ReLU(True)))

        self.channels = [self.num_feature_maps]
        for i in range(self.num_layers - 1):

            if batch_norm:
                self.cl.append(nn.Sequential(
                    nn.Conv2d(self.channels[-1], self.channels[-1] * 2, kernel_size, stride=stride,
                              padding=padding // 2,
                              bias=False),
                    nn.BatchNorm2d(self.channels[-1] * 2),
                    nn.ReLU(True)))
            else:
                self.cl.append(nn.Sequential(
                    nn.Conv2d(self.channels[-1], self.channels[-1] * 2, kernel_size, stride=stride,
                              padding=padding // 2, bias=False),
                    nn.ReLU(True)))

            self.channels.append(2 * self.channels[-1])

        self.cl.append(nn.Sequential(
            nn.Conv2d(self.channels[-1], code_size, self.final_size, stride=1, padding=0, bias=False),
            nn.Tanh()))
Exemplo n.º 2
0
    def __init__(self, in_channels, out_channels, in_size, kernel_size, stride=1, batch_norm=True):
        super(Deconv, self).__init__()
        
        padding = calculate_padding(kernel_size, stride)
        self.dcl = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding // 2,
                                      bias=False)

        if batch_norm:
            self.activation = nn.Sequential(nn.BatchNorm2d(out_channels), nn.ReLU(True))
        else:
            self.activation = nn.ReLU(True)

        self.required_channels = out_channels
        self.out_size_required = tuple(x * stride for x in in_size)
Exemplo n.º 3
0
    def __init__(self, in_channels, out_channels, skip_channels, in_size, kernel_size, stride=1, batch_norm=True):
        super(UnetBlock, self).__init__()
        # This ensures that we have same padding no matter if we have even or odd kernels
        padding = calculate_padding(kernel_size, stride)
        self.dcl1 = nn.ConvTranspose2d(in_channels + skip_channels, in_channels, 3, padding=1, bias=False)
        self.dcl2 = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride,
                                       padding=padding // 2, bias=False)
        if batch_norm:
            self.activation1 = nn.Sequential(nn.BatchNorm2d(in_channels), nn.ReLU(True))
            self.activation2 = nn.Sequential(nn.BatchNorm2d(out_channels), nn.ReLU(True))
        else:
            self.activation1 = nn.ReLU(True)
            self.activation2 = nn.ReLU(True)

        self.required_channels = out_channels
        self.out_size_required = tuple(x * stride for x in in_size)
Exemplo n.º 4
0
    def __init__(self, img_size, latent_size, condition_size=0, aux_size=0, kernel_size=4, num_channels=3,
                 num_gen_channels=1024, skip_channels=[], batch_norm=True, sequential_noise=False,
                 aux_only_on_top=False):
        super(Generator, self).__init__()
        # If we have a tuple make sure we maintain the aspect ratio
        if isinstance(img_size, tuple):
            self.img_size = img_size
            self.init_size = tuple(int(4 * x / max(img_size)) for x in self.img_size)
        else:
            self.img_size = (img_size, img_size)
            self.init_size = (4, 4)

        self.latent_size = latent_size
        self.condition_size = condition_size
        self.aux_size = aux_size

        self.rnn_noise = None
        if self.aux_size > 0 and sequential_noise:
            self.rnn_noise = nn.GRU(self.aux_size, self.aux_size, batch_first=True)
            self.rnn_noise_squashing = nn.Tanh()

        self.num_layers = int(np.log2(max(self.img_size))) - 1
        self.num_channels = num_channels
        self.num_gen_channels = num_gen_channels

        self.dcl = nn.ModuleList()

        self.aux_only_on_top = aux_only_on_top
        self.total_latent_size = self.latent_size + self.condition_size

        if self.aux_size > 0 and self.aux_only_on_top:
            self.aux_dcl = nn.Sequential(
                nn.ConvTranspose2d(self.aux_size, num_gen_channels, (self.init_size[0] // 2, self.init_size[1]),
                                   bias=False),
                nn.BatchNorm2d(num_gen_channels),
                nn.ReLU(True),
                nn.ConstantPad2d((0, 0, 0, self.init_size[0] // 2), 0))
        else:
            self.total_latent_size += self.aux_size

        stride = 2
        if batch_norm:
            self.dcl.append(
                nn.Sequential(
                    nn.ConvTranspose2d(self.total_latent_size, num_gen_channels, self.init_size, bias=False),
                    nn.BatchNorm2d(num_gen_channels),
                    nn.ReLU(True)))
        else:
            self.dcl.append(
                nn.Sequential(
                    nn.ConvTranspose2d(self.total_latent_size, num_gen_channels, self.init_size, bias=False),
                    nn.ReLU(True)))

        num_input_channels = self.num_gen_channels
        in_size = self.init_size
        for i in range(self.num_layers - 2):
            if not skip_channels:
                self.dcl.append(Deconv(num_input_channels, num_input_channels // 2, in_size, kernel_size, stride=stride,
                                       batch_norm=batch_norm))
            else:
                self.dcl.append(
                    UnetBlock(num_input_channels, num_input_channels // 2, skip_channels[i], in_size,
                              kernel_size, stride=stride, batch_norm=batch_norm))

            num_input_channels //= 2
            in_size = tuple(2 * x for x in in_size)

        padding = calculate_padding(kernel_size, stride)
        self.dcl.append(nn.ConvTranspose2d(num_input_channels, self.num_channels, kernel_size,
                                           stride=stride, padding=padding // 2, bias=False))
        self.final_activation = nn.Tanh()
Exemplo n.º 5
0
    def __init__(self,
                 code_size,
                 rate,
                 feat_length,
                 init_kernel=None,
                 init_stride=None,
                 num_feature_maps=16,
                 increasing_stride=True):
        super(Audio_Encoder, self).__init__()

        self.code_size = code_size
        self.cl = nn.ModuleList()
        self.activations = nn.ModuleList()
        self.strides = []
        self.kernels = []

        features = feat_length * rate
        strides = prime_factors(features)
        kernels = [2 * s for s in strides]

        if init_kernel is not None and init_stride is not None:
            self.strides.append(int(init_stride * rate))
            self.kernels.append(int(init_kernel * rate))
            padding = calculate_padding(init_kernel * rate,
                                        stride=init_stride * rate,
                                        in_size=features)
            init_features = calculate_output_size(features,
                                                  init_kernel * rate,
                                                  stride=init_stride * rate,
                                                  padding=padding)
            strides = prime_factors(init_features)
            kernels = [2 * s for s in strides]

        if not increasing_stride:
            strides.reverse()
            kernels.reverse()

        self.strides.extend(strides)
        self.kernels.extend(kernels)

        for i in range(len(self.strides) - 1):
            padding = calculate_padding(self.kernels[i],
                                        stride=self.strides[i],
                                        in_size=features)
            features = calculate_output_size(features,
                                             self.kernels[i],
                                             stride=self.strides[i],
                                             padding=padding)
            pad = int(math.ceil(padding / 2.0))

            if i == 0:
                self.cl.append(
                    nn.Conv1d(1,
                              num_feature_maps,
                              self.kernels[i],
                              stride=self.strides[i],
                              padding=pad))
                self.activations.append(
                    nn.Sequential(nn.BatchNorm1d(num_feature_maps),
                                  nn.ReLU(True)))
            else:
                self.cl.append(
                    nn.Conv1d(num_feature_maps,
                              2 * num_feature_maps,
                              self.kernels[i],
                              stride=self.strides[i],
                              padding=pad))
                self.activations.append(
                    nn.Sequential(nn.BatchNorm1d(2 * num_feature_maps),
                                  nn.ReLU(True)))

                num_feature_maps *= 2

        self.cl.append(nn.Conv1d(num_feature_maps, self.code_size, features))
        self.activations.append(nn.Tanh())