def __init__(self, code_size, img_size, kernel_size=4, num_input_channels=3, num_feature_maps=64, batch_norm=True): super(Image_Encoder, self).__init__() # Get the dimension which is a power of 2 if is_power2(max(img_size)): stable_dim = max(img_size) else: stable_dim = min(img_size) if isinstance(img_size, tuple): self.img_size = img_size self.final_size = tuple(int(4 * x // stable_dim) for x in self.img_size) else: self.img_size = (img_size, img_size) self.final_size = (4, 4) self.code_size = code_size self.num_feature_maps = num_feature_maps self.cl = nn.ModuleList() self.num_layers = int(np.log2(max(self.img_size))) - 2 stride = 2 # This ensures that we have same padding no matter if we have even or odd kernels padding = calculate_padding(kernel_size, stride) if batch_norm: self.cl.append(nn.Sequential( nn.Conv2d(num_input_channels, self.num_feature_maps, kernel_size, stride=stride, padding=padding // 2, bias=False), nn.BatchNorm2d(self.num_feature_maps), nn.ReLU(True))) else: self.cl.append(nn.Sequential( nn.Conv2d(num_input_channels, self.num_feature_maps, kernel_size, stride=stride, padding=padding // 2, bias=False), nn.ReLU(True))) self.channels = [self.num_feature_maps] for i in range(self.num_layers - 1): if batch_norm: self.cl.append(nn.Sequential( nn.Conv2d(self.channels[-1], self.channels[-1] * 2, kernel_size, stride=stride, padding=padding // 2, bias=False), nn.BatchNorm2d(self.channels[-1] * 2), nn.ReLU(True))) else: self.cl.append(nn.Sequential( nn.Conv2d(self.channels[-1], self.channels[-1] * 2, kernel_size, stride=stride, padding=padding // 2, bias=False), nn.ReLU(True))) self.channels.append(2 * self.channels[-1]) self.cl.append(nn.Sequential( nn.Conv2d(self.channels[-1], code_size, self.final_size, stride=1, padding=0, bias=False), nn.Tanh()))
def __init__(self, in_channels, out_channels, in_size, kernel_size, stride=1, batch_norm=True): super(Deconv, self).__init__() padding = calculate_padding(kernel_size, stride) self.dcl = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding // 2, bias=False) if batch_norm: self.activation = nn.Sequential(nn.BatchNorm2d(out_channels), nn.ReLU(True)) else: self.activation = nn.ReLU(True) self.required_channels = out_channels self.out_size_required = tuple(x * stride for x in in_size)
def __init__(self, in_channels, out_channels, skip_channels, in_size, kernel_size, stride=1, batch_norm=True): super(UnetBlock, self).__init__() # This ensures that we have same padding no matter if we have even or odd kernels padding = calculate_padding(kernel_size, stride) self.dcl1 = nn.ConvTranspose2d(in_channels + skip_channels, in_channels, 3, padding=1, bias=False) self.dcl2 = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding // 2, bias=False) if batch_norm: self.activation1 = nn.Sequential(nn.BatchNorm2d(in_channels), nn.ReLU(True)) self.activation2 = nn.Sequential(nn.BatchNorm2d(out_channels), nn.ReLU(True)) else: self.activation1 = nn.ReLU(True) self.activation2 = nn.ReLU(True) self.required_channels = out_channels self.out_size_required = tuple(x * stride for x in in_size)
def __init__(self, img_size, latent_size, condition_size=0, aux_size=0, kernel_size=4, num_channels=3, num_gen_channels=1024, skip_channels=[], batch_norm=True, sequential_noise=False, aux_only_on_top=False): super(Generator, self).__init__() # If we have a tuple make sure we maintain the aspect ratio if isinstance(img_size, tuple): self.img_size = img_size self.init_size = tuple(int(4 * x / max(img_size)) for x in self.img_size) else: self.img_size = (img_size, img_size) self.init_size = (4, 4) self.latent_size = latent_size self.condition_size = condition_size self.aux_size = aux_size self.rnn_noise = None if self.aux_size > 0 and sequential_noise: self.rnn_noise = nn.GRU(self.aux_size, self.aux_size, batch_first=True) self.rnn_noise_squashing = nn.Tanh() self.num_layers = int(np.log2(max(self.img_size))) - 1 self.num_channels = num_channels self.num_gen_channels = num_gen_channels self.dcl = nn.ModuleList() self.aux_only_on_top = aux_only_on_top self.total_latent_size = self.latent_size + self.condition_size if self.aux_size > 0 and self.aux_only_on_top: self.aux_dcl = nn.Sequential( nn.ConvTranspose2d(self.aux_size, num_gen_channels, (self.init_size[0] // 2, self.init_size[1]), bias=False), nn.BatchNorm2d(num_gen_channels), nn.ReLU(True), nn.ConstantPad2d((0, 0, 0, self.init_size[0] // 2), 0)) else: self.total_latent_size += self.aux_size stride = 2 if batch_norm: self.dcl.append( nn.Sequential( nn.ConvTranspose2d(self.total_latent_size, num_gen_channels, self.init_size, bias=False), nn.BatchNorm2d(num_gen_channels), nn.ReLU(True))) else: self.dcl.append( nn.Sequential( nn.ConvTranspose2d(self.total_latent_size, num_gen_channels, self.init_size, bias=False), nn.ReLU(True))) num_input_channels = self.num_gen_channels in_size = self.init_size for i in range(self.num_layers - 2): if not skip_channels: self.dcl.append(Deconv(num_input_channels, num_input_channels // 2, in_size, kernel_size, stride=stride, batch_norm=batch_norm)) else: self.dcl.append( UnetBlock(num_input_channels, num_input_channels // 2, skip_channels[i], in_size, kernel_size, stride=stride, batch_norm=batch_norm)) num_input_channels //= 2 in_size = tuple(2 * x for x in in_size) padding = calculate_padding(kernel_size, stride) self.dcl.append(nn.ConvTranspose2d(num_input_channels, self.num_channels, kernel_size, stride=stride, padding=padding // 2, bias=False)) self.final_activation = nn.Tanh()
def __init__(self, code_size, rate, feat_length, init_kernel=None, init_stride=None, num_feature_maps=16, increasing_stride=True): super(Audio_Encoder, self).__init__() self.code_size = code_size self.cl = nn.ModuleList() self.activations = nn.ModuleList() self.strides = [] self.kernels = [] features = feat_length * rate strides = prime_factors(features) kernels = [2 * s for s in strides] if init_kernel is not None and init_stride is not None: self.strides.append(int(init_stride * rate)) self.kernels.append(int(init_kernel * rate)) padding = calculate_padding(init_kernel * rate, stride=init_stride * rate, in_size=features) init_features = calculate_output_size(features, init_kernel * rate, stride=init_stride * rate, padding=padding) strides = prime_factors(init_features) kernels = [2 * s for s in strides] if not increasing_stride: strides.reverse() kernels.reverse() self.strides.extend(strides) self.kernels.extend(kernels) for i in range(len(self.strides) - 1): padding = calculate_padding(self.kernels[i], stride=self.strides[i], in_size=features) features = calculate_output_size(features, self.kernels[i], stride=self.strides[i], padding=padding) pad = int(math.ceil(padding / 2.0)) if i == 0: self.cl.append( nn.Conv1d(1, num_feature_maps, self.kernels[i], stride=self.strides[i], padding=pad)) self.activations.append( nn.Sequential(nn.BatchNorm1d(num_feature_maps), nn.ReLU(True))) else: self.cl.append( nn.Conv1d(num_feature_maps, 2 * num_feature_maps, self.kernels[i], stride=self.strides[i], padding=pad)) self.activations.append( nn.Sequential(nn.BatchNorm1d(2 * num_feature_maps), nn.ReLU(True))) num_feature_maps *= 2 self.cl.append(nn.Conv1d(num_feature_maps, self.code_size, features)) self.activations.append(nn.Tanh())