def __init__(self, dataset='youtubers'): super(discriminator, self).__init__() self.image_size = 64 self.num_channels = 3 self.embed_dim = 62 self.projected_embed_dim = 128 self.ndf = 64 self.B_dim = 128 self.C_dim = 16 self.dataset_name = dataset self.conv1 = SpectralNorm(nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False)) self.conv2 = SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False)) self.conv3 = SpectralNorm(nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False)) self.conv4 = SpectralNorm(nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False)) self.disc_linear = nn.Linear(self.ndf * 1, self.ndf) self.disc_linear2 = nn.Linear(31, 31) self.aux_linear = nn.Linear(4*4*512, self.embed_dim+1) self.softmax = nn.Softmax() self.sigmoid = nn.Sigmoid() self.projector = Concat_embed(self.embed_dim, self.projected_embed_dim) self.netD_2 = nn.Sequential( # state size. (ndf*8) x 4 x 4 nn.Conv2d(self.ndf * 8, 1, 4, 1, 0, bias=False), #nn.Conv2d(self.ndf * 8 + self.projected_embed_dim, 1, 4, 1, 0, bias=False), #nn.Sigmoid() )
def __init__(self, in_channels, out_channels, stride=1, downsample=None, norm=None, sn=False): super(ResidualBlock, self).__init__() bias = False if norm == 'BN' else True if sn: self.conv1 = SpectralNorm( nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=bias)) else: self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=bias) self.norm = norm if norm == 'BN': self.bn1 = nn.BatchNorm2d(out_channels) self.bn2 = nn.BatchNorm2d(out_channels) elif norm == 'IN': self.bn1 = nn.InstanceNorm2d(out_channels) self.bn2 = nn.InstanceNorm2d(out_channels) self.relu = nn.ReLU(inplace=True) if sn: self.conv2 = SpectralNorm( nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=bias)) else: self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=bias) self.downsample = downsample
def __init__(self, improved=False, dataset='youtubers'): super(discriminator, self).__init__() self.image_size = 64 self.num_channels = 3 self.embed_dim = 1024 self.projected_embed_dim = 128 self.ndf = 64 self.dataset_name = dataset if improved: self.netD_1 = nn.Sequential( # input is (nc) x 64 x 64 nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32 nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*4) x 8 x 8 nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*8) x 4 x 4 ) else: self.netD_1 = nn.Sequential( # input is (nc) x 64 x 64 nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32 SpectralNorm( nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False)), #nn.BatchNorm2d(self.ndf * 2), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 SpectralNorm( nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False)), #nn.BatchNorm2d(self.ndf * 4), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*4) x 8 x 8 SpectralNorm( nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False)), #nn.BatchNorm2d(self.ndf * 8), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*8) x 4 x 4 ) self.projector = Concat_embed(self.embed_dim, self.projected_embed_dim) #Uncomment first layer for concatenation and comment second. For projection do the opposit #TODO: Handle this!!! self.netD_2 = nn.Sequential( nn.Conv2d(self.ndf * 8 + 64, 1, 4, 1, 0, bias=False) #nn.Conv2d(self.ndf * 8 + self.projected_embed_dim, 1, 4, 1, 0, bias=False) )
def __init__(self, image_size): super(discriminator, self).__init__() self.image_size = image_size self.num_channels = 3 self.latent_space = 128 self.ndf = 64 # common network for both architectures, when generating 64x64 or 128x18 images self.netD_1 = nn.Sequential( # input is (nc) x 64 x 64 SpectralNorm( nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32 SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 SpectralNorm( nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*4) x 8 x 8 SpectralNorm( nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), ) # if we are feeding D with 64x64 images: if self.image_size == 64: self.netD_2 = nn.Conv2d(self.ndf * 8 + self.latent_space, 1, 4, 1, 0, bias=False) # if we are feeding D with 128x128 images: elif self.image_size == 128: self.netD_1 = nn.Sequential( self.netD_1, SpectralNorm( nn.Conv2d(self.ndf * 8, self.ndf * 16, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), ) self.netD_2 = nn.Conv2d(self.ndf * 16 + self.latent_space, 1, 4, 1, 0, bias=False)
def __init__(self, dataset='youtubers'): super(discriminator, self).__init__() self.image_size = 64 self.num_channels = 3 self.embed_dim = 62 self.projected_embed_dim = 128 self.ndf = 64 self.B_dim = 128 self.C_dim = 16 self.dataset_name = dataset self.conv1 = SpectralNorm( nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False)) self.conv2 = SpectralNorm( nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False)) self.conv3 = SpectralNorm( nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False)) self.conv4 = SpectralNorm( nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False)) self.netD_1 = nn.Sequential( # input is (nc) x 64 x 64 nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 32 x 32 nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(self.ndf * 2), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(self.ndf * 4), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*4) x 8 x 8 nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False), nn.BatchNorm2d(self.ndf * 8), nn.LeakyReLU(0.2, inplace=True), ) self.projector = Concat_embed(self.embed_dim, self.projected_embed_dim) self.netD_2 = nn.Sequential( # state size. (ndf*8) x 4 x 4 #nn.Conv2d(self.ndf * 8 , 1, 4, 1, 0, bias=False), #nn.Conv2d(self.ndf * 8 + self.projected_embed_dim, 1, 4, 1, 0, bias=False), nn.Conv2d(self.ndf * 8 + self.projected_embed_dim, 1, 4, 1, 0, bias=False), #nn.Sigmoid() )
def __init__(self, num_classes, ndf=64): super(FCDiscriminator, self).__init__() self.conv1 = SpectralNorm( nn.Conv2d(num_classes, ndf, kernel_size=4, stride=2, padding=1)) self.conv2 = SpectralNorm( nn.Conv2d(ndf, ndf * 2, kernel_size=4, stride=2, padding=1)) self.conv3 = SpectralNorm( nn.Conv2d(ndf * 2, ndf * 4, kernel_size=4, stride=2, padding=1)) self.conv4 = SpectralNorm( nn.Conv2d(ndf * 4, ndf * 8, kernel_size=4, stride=2, padding=1)) self.classifier = SpectralNorm( nn.Conv2d(ndf * 8, 1, kernel_size=4, stride=2, padding=1)) self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
def __init__(self, image_size): super(discriminator, self).__init__() self.image_size = image_size self.num_channels = 3 self.latent_space = 128 self.ndf = 64 self.netD_1 = nn.Sequential( SpectralNorm( nn.Conv2d(self.num_channels, self.ndf, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), SpectralNorm(nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), SpectralNorm( nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), SpectralNorm( nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), ) if self.image_size == 64: self.netD_2 = nn.Conv2d(self.ndf * 8 + self.latent_space, 1, 4, 1, 0, bias=False) elif self.image_size == 128: self.netD_1 = nn.Sequential( self.netD_1, SpectralNorm( nn.Conv2d(self.ndf * 8, self.ndf * 16, 4, 2, 1, bias=False)), nn.LeakyReLU(0.2, inplace=True), ) self.netD_2 = nn.Conv2d(self.ndf * 16 + self.latent_space, 1, 4, 1, 0, bias=False)
def __init__(self, image_size, audio_samples): super(generator, self).__init__() self.audio_samples = audio_samples self.num_channels = 3 self.latent_dim = 128 self.ngf = 64 self.image_size = image_size self.d_fmaps = [16, 32, 128, 256, 512, 1024] self.audio_embedding = Discriminator(1, self.d_fmaps, 15, nn.LeakyReLU(0.3), self.audio_samples) self.aux_classifier = auxclassifier() self.netG = nn.Sequential( SpectralNorm(nn.ConvTranspose2d(self.ngf * 8, self.ngf * 4, 4, 2, 1, bias=False)), nn.Dropout(), nn.ReLU(True), SpectralNorm(nn.ConvTranspose2d(self.ngf * 4, self.ngf * 2, 4, 2, 1, bias=False)), nn.Dropout(), nn.ReLU(True), SpectralNorm(nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1, bias=False)), nn.Dropout(), nn.ReLU(True), SpectralNorm(nn.ConvTranspose2d(self.ngf, self.num_channels, 4, 2, 1, bias=False)), nn.Tanh() ) if self.image_size == 64: self.netG = nn.Sequential( SpectralNorm(nn.ConvTranspose2d(self.latent_dim, self.ngf*8, 4, 1, 0, bias=False)), nn.Dropout(), nn.ReLU(True), self.netG ) if self.image_size == 128: self.netG = nn.Sequential( SpectralNorm(nn.ConvTranspose2d(self.latent_dim, self.ngf*16, 4, 1, 0, bias=False)), nn.Dropout(), nn.ReLU(True), SpectralNorm(nn.ConvTranspose2d(self.ngf*16, self.ngf*8, 4, 2, 1, bias=False)), nn.Dropout(), nn.ReLU(True), self.netG )
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, activation='LeakyReLU', norm=None, init_method=None, std=1., sn=False): super(ConvLayer, self).__init__() bias = False if norm == 'BN' else True # 后边有batchnorm层,不需要设置bias,因为会将输出归一化,设置偏置没有用 self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias) if sn: self.conv2d = SpectralNorm(self.conv2d) #谱归一化 if activation is not None: if activation == 'LeakyReLU': self.activation = getattr(torch.nn, activation, 'LeakyReLU') #从torch.nn中返回activation属性,如果不存在,则返回字符串'LeakyReLU' self.activation = self.activation() else: self.activation = getattr(torch, activation, activation) else: self.activation = None self.norm = norm if norm == 'BN': self.norm_layer = nn.BatchNorm2d(out_channels, momentum=0.01) elif norm == 'IN': self.norm_layer = nn.InstanceNorm2d(out_channels, track_running_stats=True) #每个sample,每个通道进行Norm
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, activation='LeakyReLU', norm=None, init_method=None, std=1., sn=False): super(ConvLayer, self).__init__() bias = False if norm == 'BN' else True self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias) if sn: self.conv2d = SpectralNorm(self.conv2d) if activation is not None: if activation == 'LeakyReLU': self.activation = getattr(torch.nn, activation, 'LeakyReLU') self.activation = self.activation() else: self.activation = getattr(torch, activation, activation) else: self.activation = None self.norm = norm if norm == 'BN': self.norm_layer = nn.BatchNorm2d(out_channels, momentum=0.01) elif norm == 'IN': self.norm_layer = nn.InstanceNorm2d(out_channels, track_running_stats=True)
def __init__(self, ninputs, fmaps, kwidth, activation, padding=None, lnorm=False, dropout=0., pooling=2, enc=True, bias=False, aal_h=None, linterp=False, snorm=False, convblock=False): # linterp: do linear interpolation instead of simple conv transpose # snorm: spectral norm super(GBlock, self).__init__() self.pooling = pooling self.linterp = linterp self.enc = enc self.kwidth = kwidth self.convblock = convblock if padding is None: padding = 0 if enc: if aal_h is not None: self.aal_conv = nn.Conv1d(ninputs, ninputs, aal_h.shape[0], stride=1, padding=aal_h.shape[0] // 2 - 1, bias=False) if snorm: self.aal_conv = SpectralNorm(self.aal_conv) # apply AAL weights, reshaping impulse response to match # in channels and out channels aal_t = torch.FloatTensor(aal_h).view(1, 1, -1) aal_t = aal_t.repeat(ninputs, ninputs, 1) self.aal_conv.weight.data = aal_t if convblock: self.conv = Conv1DResBlock(ninputs, fmaps, kwidth, stride=pooling, bias=bias) else: self.conv = nn.Conv1d(ninputs, fmaps, kwidth, stride=pooling, padding=padding, bias=bias) if snorm: self.conv = SpectralNorm(self.conv) if activation == 'glu': # TODO: REVIEW raise NotImplementedError self.glu_conv = nn.Conv1d(ninputs, fmaps, kwidth, stride=pooling, padding=padding, bias=bias) if snorm: self.glu_conv = spectral_norm(self.glu_conv) else: if linterp: # pre-conv prior to upsampling self.pre_conv = nn.Conv1d(ninputs, ninputs // 8, kwidth, stride=1, padding=kwidth // 2, bias=bias) self.conv = nn.Conv1d(ninputs // 8, fmaps, kwidth, stride=1, padding=kwidth // 2, bias=bias) if snorm: self.conv = SpectralNorm(self.conv) if activation == 'glu': self.glu_conv = nn.Conv1d(ninputs, fmaps, kwidth, stride=1, padding=kwidth // 2, bias=bias) if snorm: self.glu_conv = SpectralNorm(self.glu_conv) else: if convblock: self.conv = Conv1DResBlock(ninputs, fmaps, kwidth, stride=pooling, bias=bias, transpose=True) else: # decoder like with transposed conv # compute padding required based on pooling pad = (2 * pooling - pooling - kwidth) // -2 self.conv = nn.ConvTranspose1d(ninputs, fmaps, kwidth, stride=pooling, padding=pad, output_padding=0, bias=bias) if snorm: self.conv = SpectralNorm(self.conv) if activation == 'glu': # TODO: REVIEW raise NotImplementedError self.glu_conv = nn.ConvTranspose1d(ninputs, fmaps, kwidth, stride=pooling, padding=padding, output_padding=pooling - 1, bias=bias) if snorm: self.glu_conv = spectral_norm(self.glu_conv) if activation is not None: self.act = activation if lnorm: self.ln = LayerNorm() if dropout > 0: self.dout = nn.Dropout(dropout)
def __init__(self, dataset='youtubers'): super(generator, self).__init__() self.image_size = 64 self.num_channels = 3 self.noise_dim = 100 self.embed_dim = 62 self.projected_embed_dim = 128 self.raw_wav_dim = 64000 self.latent_dim = self.projected_embed_dim self.dataset_name = dataset self.projection = nn.Sequential( nn.Linear(in_features=self.embed_dim, out_features=self.projected_embed_dim), nn.BatchNorm1d(num_features=self.projected_embed_dim), nn.LeakyReLU(negative_slope=0.2, inplace=True)) #self.d_fmaps = [64, 128, 256, 512, 1024, 1024] self.d_fmaps = [16, 32, 128, 256, 512, 1024] self.act = [ nn.PReLU(fmaps) for fmaps in [64, 128, 256, 512, 1024, 1024] ] self.audio_embedding = Discriminator(1, self.d_fmaps, 15, nn.LeakyReLU(0.3)) #self.audio_embedding = Generator(1, self.d_fmaps, 31, self.act) self.ngf = 64 # based on: https://github.com/pytorch/examples/blob/master/dcgan/main.py self.netG = nn.Sequential( SpectralNorm( nn.ConvTranspose2d(self.latent_dim, self.ngf * 8, 4, 1, 0, bias=False)), #nn.BatchNorm2d(self.ngf * 8), nn.Dropout(), nn.ReLU(True), # state size. (ngf*8) x 4 x 4 SpectralNorm( nn.ConvTranspose2d(self.ngf * 8, self.ngf * 4, 4, 2, 1, bias=False)), nn.Dropout(), #nn.BatchNorm2d(self.ngf * 4), nn.ReLU(True), # state size. (ngf*4) x 8 x 8 SpectralNorm( nn.ConvTranspose2d(self.ngf * 4, self.ngf * 2, 4, 2, 1, bias=False)), nn.Dropout(), #nn.BatchNorm2d(self.ngf * 2), nn.ReLU(True), # state size. (ngf*2) x 16 x 16 SpectralNorm( nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1, bias=False)), #nn.BatchNorm2d(self.ngf), nn.Dropout(), nn.ReLU(True), # state size. (ngf) x 32 x 32 SpectralNorm( nn.ConvTranspose2d(self.ngf, self.num_channels, 4, 2, 1, bias=False)), #nn.Dropout2d(), nn.Tanh() # state size. (num_channels) x 64 x 64 )
def __init__(self, ninputs, fmaps, kwidth, activation, padding=None, lnorm=False, dropout=0., pooling=2, enc=True, bias=False, aal_h=None, linterp=False, snorm=False, convblock=False): super(GBlock, self).__init__() self.pooling = pooling self.linterp = linterp self.enc = enc self.kwidth = kwidth self.convblock= convblock if padding is None: padding = 0 if enc: if aal_h is not None: self.aal_conv = nn.Conv1d(ninputs, ninputs, aal_h.shape[0], stride=1, padding=aal_h.shape[0] // 2 - 1, bias=False) if snorm: self.aal_conv = SpectralNorm(self.aal_conv) aal_t = torch.FloatTensor(aal_h).view(1, 1, -1) aal_t = aal_t.repeat(ninputs, ninputs, 1) self.aal_conv.weight.data = aal_t if convblock: self.conv = Conv1DResBlock(ninputs, fmaps, kwidth, stride=pooling, bias=bias) else: self.conv = nn.Conv1d(ninputs, fmaps, kwidth, stride=pooling, padding=padding, bias=bias) if snorm: self.conv = SpectralNorm(self.conv) if activation == 'glu': raise NotImplementedError self.glu_conv = nn.Conv1d(ninputs, fmaps, kwidth, stride=pooling, padding=padding, bias=bias) if snorm: self.glu_conv = spectral_norm(self.glu_conv) else: if linterp: self.pre_conv = nn.Conv1d(ninputs, ninputs // 8, kwidth, stride=1, padding=kwidth//2, bias=bias) self.conv = nn.Conv1d(ninputs // 8, fmaps, kwidth, stride=1, padding=kwidth//2, bias=bias) if snorm: self.conv = SpectralNorm(self.conv) if activation == 'glu': self.glu_conv = nn.Conv1d(ninputs, fmaps, kwidth, stride=1, padding=kwidth//2, bias=bias) if snorm: self.glu_conv = SpectralNorm(self.glu_conv) else: if convblock: self.conv = Conv1DResBlock(ninputs, fmaps, kwidth, stride=pooling, bias=bias, transpose=True) else: pad = (2 * pooling - pooling - kwidth)//-2 self.conv = nn.ConvTranspose1d(ninputs, fmaps, kwidth, stride=pooling, padding=pad, output_padding=0, bias=bias) if snorm: self.conv = SpectralNorm(self.conv) if activation == 'glu': raise NotImplementedError self.glu_conv = nn.ConvTranspose1d(ninputs, fmaps, kwidth, stride=pooling, padding=padding, output_padding=pooling-1, bias=bias) if snorm: self.glu_conv = spectral_norm(self.glu_conv) if activation is not None: self.act = activation if lnorm: self.ln = LayerNorm() if dropout > 0: self.dout = nn.Dropout(dropout)
def __init__(self, image_size, audio_samples): super(generator, self).__init__() # defining some useful variables self.audio_samples = audio_samples self.num_channels = 3 self.latent_dim = 128 self.ngf = 64 self.image_size = image_size # defining segan's D self.d_fmaps = [16, 32, 128, 256, 512, 1024] self.audio_embedding = Discriminator(1, self.d_fmaps, 15, nn.LeakyReLU(0.3), self.audio_samples) # defining the auxiliary classifier self.aux_classifier = auxclassifier() # common network for both architectures when generating 64x64 or 128x18 images self.netG = nn.Sequential( # state size. (ngf*4) x 8 x 8 SpectralNorm( nn.ConvTranspose2d(self.ngf * 8, self.ngf * 4, 4, 2, 1, bias=False)), nn.Dropout(), # nn.BatchNorm2d(self.ngf * 2), nn.ReLU(True), # state size. (ngf*2) x 16 x 16 SpectralNorm( nn.ConvTranspose2d(self.ngf * 4, self.ngf * 2, 4, 2, 1, bias=False)), # nn.BatchNorm2d(self.ngf), nn.Dropout(), nn.ReLU(True), # state size. (ngf) x 32 x 32 SpectralNorm( nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1, bias=False)), nn.Dropout(), nn.ReLU(True), # If we add here Dropout, we would only generate noise, but not realistic faces SpectralNorm( nn.ConvTranspose2d(self.ngf, self.num_channels, 4, 2, 1, bias=False)), # state size. (num_channels) x 128 x 128 nn.Tanh()) # if we want to generate 64x64 images: if self.image_size == 64: self.netG = nn.Sequential( SpectralNorm( nn.ConvTranspose2d(self.latent_dim, self.ngf * 8, 4, 1, 0, bias=False)), nn.Dropout(), # nn.BatchNorm2d(self.ngf * 4), nn.ReLU(True), self.netG) # if we want to generate 128 x 128 images: if self.image_size == 128: self.netG = nn.Sequential( SpectralNorm( nn.ConvTranspose2d(self.latent_dim, self.ngf * 16, 4, 1, 0, bias=False)), nn.Dropout(), nn.ReLU(True), SpectralNorm( nn.ConvTranspose2d(self.ngf * 16, self.ngf * 8, 4, 2, 1, bias=False)), nn.Dropout(), # nn.BatchNorm2d(self.ngf * 4), nn.ReLU(True), self.netG)
def __init__(self, dataset='youtubers'): super(generator, self).__init__() self.image_size = 64 self.num_channels = 3 self.noise_dim = 100 self.embed_dim = 62 self.projected_embed_dim = 128 self.latent_dim = self.noise_dim + self.projected_embed_dim self.ngf = 64 self.dataset_name = dataset self.projection = nn.Sequential( nn.Linear(in_features=self.embed_dim, out_features=self.projected_embed_dim), nn.BatchNorm1d(num_features=self.projected_embed_dim), nn.LeakyReLU(negative_slope=0.2, inplace=True)) # based on: https://github.com/pytorch/examples/blob/master/dcgan/main.py self.netG = nn.Sequential( SpectralNorm( nn.ConvTranspose2d(self.latent_dim, self.ngf * 8, 4, 1, 0, bias=False)), #nn.BatchNorm2d(self.ngf * 8), nn.Dropout2d(), nn.ReLU(True), # state size. (ngf*8) x 4 x 4 SpectralNorm( nn.ConvTranspose2d(self.ngf * 8, self.ngf * 4, 4, 2, 1, bias=False)), nn.Dropout2d(), #nn.BatchNorm2d(self.ngf * 4), nn.ReLU(True), # state size. (ngf*4) x 8 x 8 SpectralNorm( nn.ConvTranspose2d(self.ngf * 4, self.ngf * 2, 4, 2, 1, bias=False)), nn.Dropout2d(), #nn.BatchNorm2d(self.ngf * 2), nn.ReLU(True), # state size. (ngf*2) x 16 x 16 SpectralNorm( nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1, bias=False)), #nn.BatchNorm2d(self.ngf), nn.ReLU(True), # state size. (ngf) x 32 x 32 SpectralNorm( nn.ConvTranspose2d(self.ngf, self.num_channels, 4, 2, 1, bias=False)), nn.Dropout2d(), nn.Tanh() # state size. (num_channels) x 64 x 64 )