def __init__(self, nz=32, ngf=64, output_size=16384, nc=1, num_measurements=1000, cuda=True): super(DCGAN_Audio_Straight, self).__init__() self.nc = nc self.output_size = output_size self.CUDA = cuda # Deconv Layers: (in_channels, out_channels, kernel_size, stride, padding, bias = false) # Inputs: R^(N x Cin x Lin), Outputs: R^(N, Cout, Lout) s.t. Lout = (Lin - 1)*stride - 2*padding + kernel_size self.conv1 = nn.ConvTranspose1d(nz, ngf, 4, 1, 0, bias=False) self.bn1 = nn.BatchNorm1d(ngf) # LAYER 1: input: (random) zϵR^(nzx1), output: x1ϵR^(64x4) (channels x length) self.conv2 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn2 = nn.BatchNorm1d(ngf) # LAYER 2: input: x1ϵR^(64x4), output: x2ϵR^(64x8) (channels x length) self.conv3 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn3 = nn.BatchNorm1d(ngf) # LAYER 3: input: x1ϵR^(64x8), output: x2ϵR^(64x16) (channels x length) self.conv4 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn4 = nn.BatchNorm1d(ngf) # LAYER 4: input: x1ϵR^(64x16), output: x2ϵR^(64x32) (channels x length) self.conv5 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn5 = nn.BatchNorm1d(ngf) # LAYER 5: input: x2ϵR^(64x32), output: x3ϵR^(64x64) (channels x length) self.conv6 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn6 = nn.BatchNorm1d(ngf) # LAYER 6: input: x3ϵR^(64x64), output: x4ϵR^(64x128) (channels x length) self.conv7 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn7 = nn.BatchNorm1d(ngf) # LAYER 7: input: x4ϵR^(64x128), output: x5ϵR^(64x256) (channels x length) self.conv8 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn8 = nn.BatchNorm1d(ngf) # LAYER 8: input: x5ϵR^(64x256), output: x6ϵR^(64x512) (channels x length) self.conv9 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn9 = nn.BatchNorm1d(ngf) # LAYER 9: input: x5ϵR^(64x512), output: x6ϵR^(64x1024) (channels x length) self.conv10 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn10 = nn.BatchNorm1d(ngf) # LAYER 10: input: x5ϵR^(64x1024), output: x6ϵR^(64x2048) (channels x length) self.conv11 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn11 = nn.BatchNorm1d(ngf) # LAYER 11: input: x5ϵR^(64x2048), output: x6ϵR^(64x4096) (channels x length) self.conv12 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False) self.bn12 = nn.BatchNorm1d(ngf) # LAYER 12: input: x5ϵR^(64x4096), output: x6ϵR^(64x8192) (channels x length) self.conv13 = nn.ConvTranspose1d(ngf, nc, 4, 2, 1, bias=False) # output is image # LAYER 13: input: x6ϵR^(64x8192), output: (sinusoid) G(z,w)ϵR^(ncx16384) (channels x length) self.fc = nn.Linear(output_size * nc, num_measurements, bias=False) # output is A; measurement matrix
def __init__(self, ngf=16): super().__init__() # size notations = [batch_size x feature_maps x width] (height omitted - 1D convolutions) # encoder gets a noisy signal as input self.enc1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=32, stride=2, padding=15) # out : [B x 16 x 8192] self.enc1_nl = nn.PReLU() # non-linear transformation after encoder layer 1 self.enc2 = nn.Conv1d(16, 32, 32, 2, 15) # [B x 32 x 4096] self.enc2_nl = nn.PReLU() self.enc3 = nn.Conv1d(32, 32, 32, 2, 15) # [B x 32 x 2048] self.enc3_nl = nn.PReLU() self.enc4 = nn.Conv1d(32, 64, 32, 2, 15) # [B x 64 x 1024] self.enc4_nl = nn.PReLU() self.enc5 = nn.Conv1d(64, 64, 32, 2, 15) # [B x 64 x 512] self.enc5_nl = nn.PReLU() self.enc6 = nn.Conv1d(64, 128, 32, 2, 15) # [B x 128 x 256] self.enc6_nl = nn.PReLU() self.enc7 = nn.Conv1d(128, 128, 32, 2, 15) # [B x 128 x 128] self.enc7_nl = nn.PReLU() self.enc8 = nn.Conv1d(128, 256, 32, 2, 15) # [B x 256 x 64] self.enc8_nl = nn.PReLU() self.enc9 = nn.Conv1d(256, 256, 32, 2, 15) # [B x 256 x 32] self.enc9_nl = nn.PReLU() self.enc10 = nn.Conv1d(256, 512, 32, 2, 15) # [B x 512 x 16] self.enc10_nl = nn.PReLU() self.enc11 = nn.Conv1d(512, 1024, 32, 2, 15) # output : [B x 1024 x 8] self.enc11_nl = nn.PReLU() # decoder generates an enhanced signal # each decoder output are concatenated with homolgous encoder output, # so the feature map sizes are doubled self.dec10 = nn.ConvTranspose1d(in_channels=2048, out_channels=512, kernel_size=32, stride=2, padding=15) self.dec10_nl = nn.PReLU() # out : [B x 512 x 16] -> (concat) [B x 1024 x 16] self.dec9 = nn.ConvTranspose1d(1024, 256, 32, 2, 15) # [B x 256 x 32] self.dec9_nl = nn.PReLU() self.dec8 = nn.ConvTranspose1d(512, 256, 32, 2, 15) # [B x 256 x 64] self.dec8_nl = nn.PReLU() self.dec7 = nn.ConvTranspose1d(512, 128, 32, 2, 15) # [B x 128 x 128] self.dec7_nl = nn.PReLU() self.dec6 = nn.ConvTranspose1d(256, 128, 32, 2, 15) # [B x 128 x 256] self.dec6_nl = nn.PReLU() self.dec5 = nn.ConvTranspose1d(256, 64, 32, 2, 15) # [B x 64 x 512] self.dec5_nl = nn.PReLU() self.dec4 = nn.ConvTranspose1d(128, 64, 32, 2, 15) # [B x 64 x 1024] self.dec4_nl = nn.PReLU() self.dec3 = nn.ConvTranspose1d(128, 32, 32, 2, 15) # [B x 32 x 2048] self.dec3_nl = nn.PReLU() self.dec2 = nn.ConvTranspose1d(64, 32, 32, 2, 15) # [B x 32 x 4096] self.dec2_nl = nn.PReLU() self.dec1 = nn.ConvTranspose1d(64, 16, 32, 2, 15) # [B x 16 x 8192] self.dec1_nl = nn.PReLU() self.dec_final = nn.ConvTranspose1d(32, 1, 32, 2, 15) # [B x 1 x 16384] self.dec_tanh = nn.Tanh() # initialize weights self.init_weights()
def conv_transpose(*args, **kwargs): return nn.ConvTranspose1d(*args, **kwargs)
def __init__(self, ninputs, fmaps, kwidth, activation, padding=None, lnorm=False, dropout=0., pooling=2, enc=True, bias=False, aal_h=None, linterp=False, snorm=False, convblock=False): # linterp: do linear interpolation instead of simple conv transpose # snorm: spectral norm super(GBlock, self).__init__() self.pooling = pooling self.linterp = linterp self.enc = enc self.kwidth = kwidth self.convblock = convblock if padding is None: padding = 0 if enc: if aal_h is not None: self.aal_conv = nn.Conv1d(ninputs, ninputs, aal_h.shape[0], stride=1, padding=aal_h.shape[0] // 2 - 1, bias=False) if snorm: self.aal_conv = SpectralNorm(self.aal_conv) # apply AAL weights, reshaping impulse response to match # in channels and out channels aal_t = torch.FloatTensor(aal_h).view(1, 1, -1) aal_t = aal_t.repeat(ninputs, ninputs, 1) self.aal_conv.weight.data = aal_t if convblock: self.conv = Conv1DResBlock(ninputs, fmaps, kwidth, stride=pooling, bias=bias) else: self.conv = nn.Conv1d(ninputs, fmaps, kwidth, stride=pooling, padding=padding, bias=bias) if snorm: self.conv = SpectralNorm(self.conv) if activation == 'glu': # TODO: REVIEW raise NotImplementedError self.glu_conv = nn.Conv1d(ninputs, fmaps, kwidth, stride=pooling, padding=padding, bias=bias) if snorm: self.glu_conv = spectral_norm(self.glu_conv) else: if linterp: # pre-conv prior to upsampling self.pre_conv = nn.Conv1d(ninputs, ninputs // 8, kwidth, stride=1, padding=kwidth // 2, bias=bias) self.conv = nn.Conv1d(ninputs // 8, fmaps, kwidth, stride=1, padding=kwidth // 2, bias=bias) if snorm: self.conv = SpectralNorm(self.conv) if activation == 'glu': self.glu_conv = nn.Conv1d(ninputs, fmaps, kwidth, stride=1, padding=kwidth // 2, bias=bias) if snorm: self.glu_conv = SpectralNorm(self.glu_conv) else: if convblock: self.conv = Conv1DResBlock(ninputs, fmaps, kwidth, stride=pooling, bias=bias, transpose=True) else: # decoder like with transposed conv # compute padding required based on pooling pad = (2 * pooling - pooling - kwidth) // -2 self.conv = nn.ConvTranspose1d(ninputs, fmaps, kwidth, stride=pooling, padding=pad, output_padding=0, bias=bias) if snorm: self.conv = SpectralNorm(self.conv) if activation == 'glu': # TODO: REVIEW raise NotImplementedError self.glu_conv = nn.ConvTranspose1d(ninputs, fmaps, kwidth, stride=pooling, padding=padding, output_padding=pooling - 1, bias=bias) if snorm: self.glu_conv = spectral_norm(self.glu_conv) if activation is not None: self.act = activation if lnorm: self.ln = LayerNorm() if dropout > 0: self.dout = nn.Dropout(dropout)
def encoder_sequential(input_size, output_size, *args, **kwargs): return nn.Sequential( nn.LeakyReLU(0.2), weight_norm((nn.ConvTranspose1d(input_size, output_size, *args, **kwargs))))
def __init__(self): super(Autoencoder, self).__init__() n_channels_base = 4 self.encoder = nn.Sequential( nn.Conv1d(in_channels=1, out_channels=n_channels_base, kernel_size=5, stride=2, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.LeakyReLU(0.2, inplace=True), nn.Conv1d(in_channels=n_channels_base, out_channels=2 * n_channels_base, kernel_size=5, stride=2, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.BatchNorm1d(2 * n_channels_base), nn.LeakyReLU(0.2, inplace=True), nn.Conv1d(in_channels=2 * n_channels_base, out_channels=4 * n_channels_base, kernel_size=5, stride=3, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.BatchNorm1d(4 * n_channels_base), nn.LeakyReLU(0.2, inplace=True), nn.Conv1d(in_channels=4 * n_channels_base, out_channels=8 * n_channels_base, kernel_size=5, stride=3, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.BatchNorm1d(8 * n_channels_base), nn.LeakyReLU(0.2, inplace=True), nn.Conv1d(in_channels=8 * n_channels_base, out_channels=16 * n_channels_base, kernel_size=5, stride=3, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.BatchNorm1d(16 * n_channels_base), nn.LeakyReLU(0.2, inplace=True), nn.Conv1d(in_channels=16 * n_channels_base, out_channels=32 * n_channels_base, kernel_size=8, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.Tanh(), ) self.decoder = nn.Sequential( nn.ConvTranspose1d(in_channels=32 * n_channels_base, out_channels=16 * n_channels_base, kernel_size=5, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.ReLU(), nn.ConvTranspose1d(in_channels=16 * n_channels_base, out_channels=8 * n_channels_base, kernel_size=5, stride=4, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.BatchNorm1d(8 * n_channels_base), nn.ReLU(), nn.ConvTranspose1d(in_channels=8 * n_channels_base, out_channels=4 * n_channels_base, kernel_size=7, stride=4, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.BatchNorm1d(4 * n_channels_base), nn.ReLU(), nn.ConvTranspose1d(in_channels=4 * n_channels_base, out_channels=2 * n_channels_base, kernel_size=7, stride=3, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.BatchNorm1d(2 * n_channels_base), nn.ReLU(), nn.ConvTranspose1d(in_channels=2 * n_channels_base, out_channels=n_channels_base, kernel_size=7, stride=2, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.BatchNorm1d(n_channels_base), nn.ReLU(), nn.ConvTranspose1d(in_channels=n_channels_base, out_channels=1, kernel_size=3, stride=2, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'), nn.Sigmoid(), )
def __init__( self, out_channels, kernel_size, input_shape=None, in_channels=None, stride=1, dilation=1, padding=0, output_padding=0, groups=1, bias=True, skip_transpose=False, ): super().__init__() self.kernel_size = kernel_size self.stride = stride self.dilation = dilation self.padding = padding self.unsqueeze = False self.skip_transpose = skip_transpose if input_shape is None and in_channels is None: raise ValueError("Must provide one of input_shape or in_channels") if in_channels is None: in_channels = self._check_input_shape(input_shape) if self.padding == "same": L_in = input_shape[-1] if skip_transpose else input_shape[1] padding_value = get_padding_elem_transposed( L_in, L_in, stride=stride, kernel_size=kernel_size, dilation=dilation, output_padding=output_padding, ) elif self.padding == "factor": L_in = input_shape[-1] if skip_transpose else input_shape[1] padding_value = get_padding_elem_transposed( L_in * stride, L_in, stride=stride, kernel_size=kernel_size, dilation=dilation, output_padding=output_padding, ) elif self.padding == "valid": padding_value = 0 elif type(self.padding) is int: padding_value = padding else: raise ValueError("Not supported padding type") self.conv = nn.ConvTranspose1d( in_channels, out_channels, self.kernel_size, stride=self.stride, dilation=self.dilation, padding=padding_value, groups=groups, bias=bias, )
def __init__(self, num_classes, cls_in_channels=256, reg_in_channels=256, roi_feat_size=7, reg_feat_up_ratio=2, reg_pre_kernel=3, reg_post_kernel=3, reg_pre_num=2, reg_post_num=1, cls_out_channels=1024, reg_offset_out_channels=256, reg_cls_out_channels=256, num_cls_fcs=1, num_reg_fcs=0, reg_class_agnostic=True, norm_cfg=None, bbox_coder=dict(type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, loss_weight=1.0)): super(SABLHead, self).__init__() self.cls_in_channels = cls_in_channels self.reg_in_channels = reg_in_channels self.roi_feat_size = roi_feat_size self.reg_feat_up_ratio = int(reg_feat_up_ratio) self.num_buckets = bbox_coder['num_buckets'] assert self.reg_feat_up_ratio // 2 >= 1 self.up_reg_feat_size = roi_feat_size * self.reg_feat_up_ratio assert self.up_reg_feat_size == bbox_coder['num_buckets'] self.reg_pre_kernel = reg_pre_kernel self.reg_post_kernel = reg_post_kernel self.reg_pre_num = reg_pre_num self.reg_post_num = reg_post_num self.num_classes = num_classes self.cls_out_channels = cls_out_channels self.reg_offset_out_channels = reg_offset_out_channels self.reg_cls_out_channels = reg_cls_out_channels self.num_cls_fcs = num_cls_fcs self.num_reg_fcs = num_reg_fcs self.reg_class_agnostic = reg_class_agnostic assert self.reg_class_agnostic self.norm_cfg = norm_cfg self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox_cls = build_loss(loss_bbox_cls) self.loss_bbox_reg = build_loss(loss_bbox_reg) self.cls_fcs = self._add_fc_branch(self.num_cls_fcs, self.cls_in_channels, self.roi_feat_size, self.cls_out_channels) self.side_num = int(np.ceil(self.num_buckets / 2)) if self.reg_feat_up_ratio > 1: self.upsample_x = nn.ConvTranspose1d(reg_in_channels, reg_in_channels, self.reg_feat_up_ratio, stride=self.reg_feat_up_ratio) self.upsample_y = nn.ConvTranspose1d(reg_in_channels, reg_in_channels, self.reg_feat_up_ratio, stride=self.reg_feat_up_ratio) self.reg_pre_convs = nn.ModuleList() for i in range(self.reg_pre_num): reg_pre_conv = ConvModule(reg_in_channels, reg_in_channels, kernel_size=reg_pre_kernel, padding=reg_pre_kernel // 2, norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_pre_convs.append(reg_pre_conv) self.reg_post_conv_xs = nn.ModuleList() for i in range(self.reg_post_num): reg_post_conv_x = ConvModule(reg_in_channels, reg_in_channels, kernel_size=(1, reg_post_kernel), padding=(0, reg_post_kernel // 2), norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_post_conv_xs.append(reg_post_conv_x) self.reg_post_conv_ys = nn.ModuleList() for i in range(self.reg_post_num): reg_post_conv_y = ConvModule(reg_in_channels, reg_in_channels, kernel_size=(reg_post_kernel, 1), padding=(reg_post_kernel // 2, 0), norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_post_conv_ys.append(reg_post_conv_y) self.reg_conv_att_x = nn.Conv2d(reg_in_channels, 1, 1) self.reg_conv_att_y = nn.Conv2d(reg_in_channels, 1, 1) self.fc_cls = nn.Linear(self.cls_out_channels, self.num_classes + 1) self.relu = nn.ReLU(inplace=True) self.reg_cls_fcs = self._add_fc_branch(self.num_reg_fcs, self.reg_in_channels, 1, self.reg_cls_out_channels) self.reg_offset_fcs = self._add_fc_branch(self.num_reg_fcs, self.reg_in_channels, 1, self.reg_offset_out_channels) self.fc_reg_cls = nn.Linear(self.reg_cls_out_channels, 1) self.fc_reg_offset = nn.Linear(self.reg_offset_out_channels, 1)
def WNConvTranspose1d(*args, **kwargs): return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
def __init__(self, sources, audio_channels=2, channels=64, depth=6, rewrite=True, glu=True, rescale=0.1, resample=True, kernel_size=8, stride=4, growth=2., lstm_layers=2, context=3, normalize=False, samplerate=44100, segment_length=4 * 10 * 44100): """ Args: sources (list[str]): list of source names audio_channels (int): stereo or mono channels (int): first convolution channels depth (int): number of encoder/decoder layers rewrite (bool): add 1x1 convolution to each encoder layer and a convolution to each decoder layer. For the decoder layer, `context` gives the kernel size. glu (bool): use glu instead of ReLU resample_input (bool): upsample x2 the input and downsample /2 the output. rescale (int): rescale initial weights of convolutions to get their standard deviation closer to `rescale` kernel_size (int): kernel size for convolutions stride (int): stride for convolutions growth (float): multiply (resp divide) number of channels by that for each layer of the encoder (resp decoder) lstm_layers (int): number of lstm layers, 0 = no lstm context (int): kernel size of the convolution in the decoder before the transposed convolution. If > 1, will provide some context from neighboring time steps. samplerate (int): stored as meta information for easing future evaluations of the model. segment_length (int): stored as meta information for easing future evaluations of the model. Length of the segments on which the model was trained. """ super().__init__() self.audio_channels = audio_channels self.sources = sources self.kernel_size = kernel_size self.context = context self.stride = stride self.depth = depth self.resample = resample self.channels = channels self.normalize = normalize self.samplerate = samplerate self.segment_length = segment_length self.encoder = nn.ModuleList() self.decoder = nn.ModuleList() if glu: activation = nn.GLU(dim=1) ch_scale = 2 else: activation = nn.ReLU() ch_scale = 1 in_channels = audio_channels for index in range(depth): encode = [] encode += [ nn.Conv1d(in_channels, channels, kernel_size, stride), nn.ReLU() ] if rewrite: encode += [ nn.Conv1d(channels, ch_scale * channels, 1), activation ] self.encoder.append(nn.Sequential(*encode)) decode = [] if index > 0: out_channels = in_channels else: out_channels = len(self.sources) * audio_channels if rewrite: decode += [ nn.Conv1d(channels, ch_scale * channels, context), activation ] decode += [ nn.ConvTranspose1d(channels, out_channels, kernel_size, stride) ] if index > 0: decode.append(nn.ReLU()) self.decoder.insert(0, nn.Sequential(*decode)) in_channels = channels channels = int(growth * channels) channels = in_channels if lstm_layers: self.lstm = BLSTM(channels, lstm_layers) else: self.lstm = None if rescale: rescale_module(self, reference=rescale)
def __init__(self, cfg): super(Vae, self).__init__(cfg) # encoder self.res0 = make_layers(cfg["n_channels"], cfg["conv1_ch"], cfg["conv0_ker"], n_layers=1, cardinality=1, dropRate=0) self.resx = ResNeXtBottleNeck(cfg["conv1_ch"], cfg["conv1_ker"], cardinality=cfg["cardinality"], dropRate=cfg["dropRate"]) self.res2 = nn.Sequential( nn.Conv1d(cfg["conv1_ch"], cfg["conv2_ch"], cfg["conv2_ker"], groups=1, padding=get_padding(cfg["conv2_ker"]), bias=False), nn.BatchNorm1d(cfg["conv2_ch"]), nn.Dropout(p=cfg["dropRate"])) self.enc_mu = nn.Linear(in_features=int( cfg["conv2_ch"] * cfg["spk_length"] / cfg["ds_ratio_tot"]), out_features=cfg["latent_dim"]) self.enc_log_var = nn.Linear(in_features=int( cfg["conv2_ch"] * cfg["spk_length"] / cfg["ds_ratio_tot"]), out_features=cfg["latent_dim"]) # decoder self.dec_linear = nn.Linear( in_features=cfg["latent_dim"], out_features=int(cfg["conv2_ch"] * cfg["spk_length"] / cfg["ds_ratio_tot"])) self.deres2 = make_layers(cfg["conv2_ch"], cfg["conv1_ch"], cfg["conv2_ker"], n_layers=1, decode=True, dropRate=cfg["dropRate"]) self.deres1 = BasicResBlock(cfg["conv1_ch"], cfg["conv1_ker"], n_layers=2, decode=True, dropRate=cfg["dropRate"]) self.deres0 = nn.ConvTranspose1d(cfg["conv1_ch"], cfg["n_channels"], cfg["conv0_ker"], padding=get_padding(cfg["conv0_ker"])) # down sampling layers self.ds1 = nn.MaxPool1d(cfg["ds_ratio_1"]) self.ds2 = nn.MaxPool1d(cfg["ds_ratio_2"]) # move model to GPU if torch.cuda.is_available(): self.cuda() # optimizer self.optimizer = optim.Adam(self.parameters(), lr=self.cfg["learn_rate"], weight_decay=self.cfg["weight_decay"], amsgrad=True) self.unique_labels = [] self.target_means = []
def __append_layer(self, net_style, args_dict): args_values_list = list(args_dict.values()) if net_style == "Conv2d": self.layers.append( nn.Conv2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7])) elif net_style == "MaxPool2d": self.layers.append( nn.MaxPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5])) elif net_style == "Linear": self.layers.append( nn.Linear(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "reshape": # 如果是特殊情况 reshape,就直接将目标向量尺寸传入 # print(type(args_values_list[0])) self.layers.append(args_values_list[0]) elif net_style == "Conv1d": self.layers.append( nn.Conv1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7])) elif net_style == "Conv3d": self.layers.append( nn.Conv3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7])) elif net_style == "ConvTranspose1d": self.layers.append( nn.ConvTranspose1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7], args_values_list[8])) elif net_style == "ConvTranspose2d": self.layers.append( nn.ConvTranspose2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7], args_values_list[8])) elif net_style == "ConvTranspose3d": self.layers.append( nn.ConvTranspose3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7], args_values_list[8])) elif net_style == "Unfold": self.layers.append( nn.Unfold(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "Fold": self.layers.append( nn.Unfold(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "MaxPool1d": self.layers.append( nn.MaxPool1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5])) elif net_style == "MaxPool3d": self.layers.append( nn.MaxPool3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5])) elif net_style == "MaxUnpool1d": self.layers.append( nn.MaxUnpool1d(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "MaxUnpool2d": self.layers.append( nn.MaxUnpool2d(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "MaxUnpool3d": self.layers.append( nn.MaxUnpool3d(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "AvgPool1d": self.layers.append( nn.AvgPool1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "AvgPool2d": self.layers.append( nn.AvgPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "AvgPool3d": self.layers.append( nn.AvgPool3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "FractionalMaxPool2d": self.layers.append( nn.FractionalMaxPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "LPPool1d": self.layers.append( nn.LPPool1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "LPPool2d": self.layers.append( nn.LPPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "AdaptiveMaxPool1d": self.layers.append( nn.AdaptiveMaxPool1d(args_values_list[0], args_values_list[1])) elif net_style == "AdaptiveMaxPool2d": self.layers.append( nn.AdaptiveMaxPool2d(args_values_list[0], args_values_list[1])) elif net_style == "AdaptiveMaxPool3d": self.layers.append( nn.AdaptiveMaxPool3d(args_values_list[0], args_values_list[1])) elif net_style == "AdaptiveAvgPool1d": self.layers.append(nn.AdaptiveAvgPool1d(args_values_list[0])) elif net_style == "AdaptiveAvgPool2d": self.layers.append(nn.AdaptiveAvgPool2d(args_values_list[0])) elif net_style == "AdaptiveAvgPool3d": self.layers.append(nn.AdaptiveAvgPool3d(args_values_list[0])) elif net_style == "ReflectionPad1d": self.layers.append(nn.ReflectionPad1d(args_values_list[0])) elif net_style == "ReflectionPad2d": self.layers.append(nn.ReflectionPad2d(args_values_list[0])) elif net_style == "ReplicationPad1d": self.layers.append(nn.ReplicationPad1d(args_values_list[0])) elif net_style == "ReplicationPad2d": self.layers.append(nn.ReplicationPad2d(args_values_list[0])) elif net_style == "ReplicationPad3d": self.layers.append(nn.ReplicationPad3d(args_values_list[0])) elif net_style == "ZeroPad2d": self.layers.append(nn.ZeroPad2d(args_values_list[0])) elif net_style == "ConstantPad1d": self.layers.append( nn.ConstantPad1d(args_values_list[0], args_values_list[1])) elif net_style == "ConstantPad2d": self.layers.append( nn.ConstantPad2d(args_values_list[0], args_values_list[1])) elif net_style == "ConstantPad3d": self.layers.append( nn.ConstantPad3d(args_values_list[0], args_values_list[1])) elif net_style == "ELU": self.layers.append(nn.ELU(args_values_list[0], args_values_list[1])) elif net_style == "Hardshrink": self.layers.append(nn.Hardshrink(args_values_list[0])) elif net_style == "Hardtanh": self.layers.append( nn.Hardtanh(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "LeakyReLU": self.layers.append( nn.LeakyReLU(args_values_list[0], args_values_list[1])) elif net_style == "LogSigmoid": self.layers.append(nn.LogSigmoid()) elif net_style == "PReLU": self.layers.append( nn.PReLU(args_values_list[0], args_values_list[1])) elif net_style == "ReLU": self.layers.append(nn.ReLU(args_values_list[0])) elif net_style == "ReLU6": self.layers.append(nn.ReLU6(args_values_list[0])) elif net_style == "RReLU": self.layers.append( nn.RReLU(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "SELU": self.layers.append(nn.SELU(args_values_list[0])) elif net_style == "CELU": self.layers.append( nn.CELU(args_values_list[0], args_values_list[1])) elif net_style == "Sigmoid": self.layers.append(nn.Sigmoid()) elif net_style == "Softplus": self.layers.append( nn.Softplus(args_values_list[0], args_values_list[1])) elif net_style == "Softshrink": self.layers.append(nn.Softshrink(args_values_list[0])) elif net_style == "Softsign": self.layers.append(nn.Softsign()) elif net_style == "Tanh": self.layers.append(nn.Tanh()) elif net_style == "Tanhshrink": self.layers.append(nn.Tanhshrink()) elif net_style == "Threshold": self.layers.append( nn.Threshold(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "Softmin": self.layers.append(nn.Softmin(args_values_list[0])) elif net_style == "Softmax": self.layers.append(nn.Softmax(args_values_list[0])) elif net_style == "Softmax2d": self.layers.append(nn.Softmax2d()) elif net_style == "LogSoftmax": self.layers.append(nn.LogSoftmax(args_values_list[0])) elif net_style == "AdaptiveLogSoftmaxWithLoss": self.layers.append( nn.AdaptiveLogSoftmaxWithLoss(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "BatchNorm1d": self.layers.append( nn.BatchNorm1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "BatchNorm2d": self.layers.append( nn.BatchNorm2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "BatchNorm3d": self.layers.append( nn.BatchNorm3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "GroupNorm": self.layers.append( nn.GroupNorm(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "InstanceNorm1d": self.layers.append( nn.InstanceNorm1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "InstanceNorm2d": self.layers.append( nn.InstanceNorm2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "InstanceNorm3d": self.layers.append( nn.InstanceNorm3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "LayerNorm": self.layers.append( nn.LayerNorm(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "LocalResponseNorm": self.layers.append( nn.LocalResponseNorm(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "Linear": self.layers.append( nn.Linear(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "Dropout": self.layers.append( nn.Dropout(args_values_list[0], args_values_list[1])) elif net_style == "Dropout2d": self.layers.append( nn.Dropout2d(args_values_list[0], args_values_list[1])) elif net_style == "Dropout3d": self.layers.append( nn.Dropout3d(args_values_list[0], args_values_list[1])) elif net_style == "AlphaDropout": self.layers.append( nn.AlphaDropout(args_values_list[0], args_values_list[1]))
def __init__(self, image_size=64, z_dim=100, conv_dim=64): super(Generator, self).__init__() self.imsize = image_size layer1 = [] layer2 = [] layer3 = [] # layern = [] last = [] repeat_num = int(np.log2(self.imsize)) - 3 mult = 2**repeat_num # 8 layer1.append( SpectralNorm(nn.ConvTranspose2d(z_dim, conv_dim * mult, 4))) layer1.append(nn.BatchNorm2d(conv_dim * mult)) layer1.append(nn.ReLU()) curr_dim = conv_dim * mult layer2.append( SpectralNorm( nn.ConvTranspose2d(curr_dim, int(curr_dim / 2), 3, 2, 2))) # 4,2,1 layer2.append(nn.BatchNorm2d(int(curr_dim / 2))) layer2.append(nn.ReLU()) curr_dim = int(curr_dim / 2) layer3.append( SpectralNorm( nn.ConvTranspose2d(curr_dim, int(curr_dim / 2), 3, 2, 2))) layer3.append(nn.BatchNorm2d(int(curr_dim / 2))) layer3.append(nn.ReLU()) # curr_dim = int(curr_dim / 2) # # layern.append(SpectralNorm(nn.ConvTranspose1d(curr_dim, int(curr_dim / 2), 4, 2, 1))) # layern.append(nn.BatchNorm2d(int(curr_dim / 2))) # layern.append(nn.ReLU()) if self.imsize == 64: layer4 = [] curr_dim = int(curr_dim / 2) layer4.append( SpectralNorm( nn.ConvTranspose2d(curr_dim, int(curr_dim / 2), 4, 2, 1))) layer4.append(nn.BatchNorm2d(int(curr_dim / 2))) layer4.append(nn.ReLU()) self.l4 = nn.Sequential(*layer4) curr_dim = int(curr_dim / 2) # self.ln = nn.Sequential(*layern) self.l1 = nn.Sequential(*layer1) self.l2 = nn.Sequential(*layer2) self.l3 = nn.Sequential(*layer3) last.append(nn.ConvTranspose2d(64, 1, 2, 2, 1)) # curr_dim last.append(nn.Tanh()) self.last = nn.Sequential(*last) self.attn1 = Self_Attn(64, 'relu') #128 self.attn2 = Self_Attn(64, 'relu') self.input1d2d = nn.ConvTranspose1d(144, 128, 1)
def __init__(self): super().__init__() # encoder gets a noisy signal as input [B x 1 x 16384] ''' class torch.nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True) in_channels(int) – 输入信号的通道。 out_channels(int) – 卷积产生的通道。有多少个out_channels,就需要多少个1维卷积 kernel_size(int or tuple) - 卷积核的尺寸,卷积核的大小为(k,),第二个维度是由in_channels来决定的,所以实际上卷积大小为kernel_size*in_channels stride(int or tuple, optional) - 卷积步长 padding (int or tuple, optional)- 输入的每一条边补充0的层数 dilation(int or tuple, `optional``) – 卷积核元素之间的间距 groups(int, optional) – 从输入通道到输出通道的阻塞连接数 bias(bool, optional) - 如果bias=True,添加偏置 输入: (N,C_in,L_in) 输出: (N,C_out,L_out) 输入输出的计算方式: $$L_{out}=floor((L_{in}+2padding-dilation(kernerl_size-1)-1)/stride+1)$$ ''' self.enc1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=32, stride=2, padding=15) # [B x 16 x 8192] 1->16 self.enc1_nl = nn.PReLU() # PReLU(x)=max(0,x)+a∗min(0,x) Parametric ReLU torch.nn.PReLU(num_parameters=1, init(a)=0.25) ''' torch.nn.PReLU(num_parameters=1, init=0.25):$PReLU(x) = max(0,x) + a * min(0,x) a是一个可学习参数。当没有声明时,nn.PReLU()在所有的输入中只有一个参数a;如果是nn.PReLU(nChannels),a将应用到每个输入。 注意:当为了表现更佳的模型而学习参数a时不要使用权重衰减(weight decay) 参数: num_parameters:需要学习的a的个数,默认等于1 init:a的初始值,默认等于0.25 ''' self.enc2 = nn.Conv1d(16, 32, 32, 2, 15) # [B x 32 x 4096] self.enc2_nl = nn.PReLU() self.enc3 = nn.Conv1d(32, 32, 32, 2, 15) # [B x 32 x 2048] self.enc3_nl = nn.PReLU() self.enc4 = nn.Conv1d(32, 64, 32, 2, 15) # [B x 64 x 1024] self.enc4_nl = nn.PReLU() self.enc5 = nn.Conv1d(64, 64, 32, 2, 15) # [B x 64 x 512] self.enc5_nl = nn.PReLU() self.enc6 = nn.Conv1d(64, 128, 32, 2, 15) # [B x 128 x 256] self.enc6_nl = nn.PReLU() self.enc7 = nn.Conv1d(128, 128, 32, 2, 15) # [B x 128 x 128] self.enc7_nl = nn.PReLU() self.enc8 = nn.Conv1d(128, 256, 32, 2, 15) # [B x 256 x 64] self.enc8_nl = nn.PReLU() self.enc9 = nn.Conv1d(256, 256, 32, 2, 15) # [B x 256 x 32] self.enc9_nl = nn.PReLU() self.enc10 = nn.Conv1d(256, 512, 32, 2, 15) # [B x 512 x 16] self.enc10_nl = nn.PReLU() self.enc11 = nn.Conv1d(512, 1024, 32, 2, 15) # [B x 1024 x 8] self.enc11_nl = nn.PReLU() # decoder generates an enhanced signal # each decoder output are concatenated with homologous encoder output, # so the feature map sizes are doubled self.dec10 = nn.ConvTranspose1d(in_channels=2048, out_channels=512, kernel_size=32, stride=2, padding=15) # 解卷积 ''' shape: 输入: (N,C_in,L_in) 输出: (N,C_out,L_out) $$L_{out}=(L_{in}-1)stride-2padding+kernel_size+output_padding$$ ''' self.dec10_nl = nn.PReLU() # out : [B x 512 x 16] -> (concat) [B x 1024 x 16] self.dec9 = nn.ConvTranspose1d(1024, 256, 32, 2, 15) # [B x 256 x 32] self.dec9_nl = nn.PReLU() self.dec8 = nn.ConvTranspose1d(512, 256, 32, 2, 15) # [B x 256 x 64] self.dec8_nl = nn.PReLU() self.dec7 = nn.ConvTranspose1d(512, 128, 32, 2, 15) # [B x 128 x 128] self.dec7_nl = nn.PReLU() self.dec6 = nn.ConvTranspose1d(256, 128, 32, 2, 15) # [B x 128 x 256] self.dec6_nl = nn.PReLU() self.dec5 = nn.ConvTranspose1d(256, 64, 32, 2, 15) # [B x 64 x 512] self.dec5_nl = nn.PReLU() self.dec4 = nn.ConvTranspose1d(128, 64, 32, 2, 15) # [B x 64 x 1024] self.dec4_nl = nn.PReLU() self.dec3 = nn.ConvTranspose1d(128, 32, 32, 2, 15) # [B x 32 x 2048] self.dec3_nl = nn.PReLU() self.dec2 = nn.ConvTranspose1d(64, 32, 32, 2, 15) # [B x 32 x 4096] self.dec2_nl = nn.PReLU() self.dec1 = nn.ConvTranspose1d(64, 16, 32, 2, 15) # [B x 16 x 8192] self.dec1_nl = nn.PReLU() self.dec_final = nn.ConvTranspose1d(32, 1, 32, 2, 15) # [B x 1 x 16384] self.dec_tanh = nn.Tanh() # initialize weights self.init_weights()
def __init__(self, args, temp=1., alpha0=10., training_flag=True): super(VAE_Concrete_Simulated, self).__init__() self.temp = temp self.dataset = args.dataset_type self.bottleneck_size = args.bottleneck_size self.learning_type = args.learning_type self.num_classes = args.num_classes self.kernel_size = 7 self.stride = 5 self.pad = 1 self.device = args.device self.model_type = args.model_type.lower() self.vae_dropout = 0.1 print("IMPORTANT: NEED TO PASS proper training_flag for ibp execution ...") if self.model_type == 'ibp': self.params_to_learn = 3 else: self.params_to_learn = 2 if self.model_type == 'ibp': self.training = training_flag self.beta_a = torch.Tensor([10]) self.beta_b = torch.Tensor([1]) self.beta_a = F.softplus(self.beta_a) + 0.01 self.beta_b = F.softplus(self.beta_b) + 0.01 # ENCODER ----------------------------------------------- # channels progression: 1 -> [32, 64, 128, 200, 200] self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=False) self.bn1 = nn.BatchNorm1d(num_features=32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True) self.bn2 = nn.BatchNorm1d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True) self.bn3 = nn.BatchNorm1d(num_features=128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.conv4 = nn.Conv1d(in_channels=128, out_channels=self.bottleneck_size*self.params_to_learn, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True) self.bn4 = nn.BatchNorm1d(num_features=self.bottleneck_size*self.params_to_learn, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.conv5 = nn.Conv1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=self.bottleneck_size*self.params_to_learn, kernel_size=self.kernel_size, padding=self.pad, stride=1, bias=True) self.bn5 = nn.BatchNorm1d(num_features=self.bottleneck_size*self.params_to_learn) # Learns means self.conv_mean = nn.Conv1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=self.bottleneck_size, kernel_size=1, padding=0, stride=1, groups=1, bias=True) # Learns logvar self.conv_logvar = nn.Conv1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=self.bottleneck_size, kernel_size=1, padding=0, stride=1, groups=1, bias=True) if self.model_type == 'ibp': self.conv_bernoulli = nn.Conv1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=self.bottleneck_size, kernel_size=1, padding=0, stride=1, groups=1, bias=True) # Classifier ---------------------------- self.full_conn1 = nn.Linear(in_features=self.bottleneck_size*self.params_to_learn, out_features=100) self.full_conn2 = nn.Linear(in_features=100, out_features=self.num_classes) # IBP ----------------------------------- if self.model_type == 'ibp': a_val = np.log(np.exp(alpha0) - 1) # inverse softplus b_val = np.log(np.exp(1.) - 1) self.beta_a = nn.Parameter(torch.Tensor(self.bottleneck_size).zero_() + a_val) self.beta_b = nn.Parameter(torch.Tensor(self.bottleneck_size).zero_() + b_val) # DECODER ----------------------------- if self.learning_type == 'supervised' or self.learning_type == 'baseline': self.unconv1 = nn.ConvTranspose1d(in_channels=self.bottleneck_size+10, out_channels=self.bottleneck_size*self.params_to_learn, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True) elif self.learning_type == 'unsupervised': self.unconv1 = nn.ConvTranspose1d(in_channels=self.bottleneck_size, out_channels=self.bottleneck_size*self.params_to_learn, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True) self.unbn1 = nn.BatchNorm1d(num_features=self.bottleneck_size*self.params_to_learn, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.unconv2 = nn.ConvTranspose1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=128, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True) self.unbn2 = nn.BatchNorm1d(num_features=128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.unconv3 = nn.ConvTranspose1d(in_channels=128, out_channels=64, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True) self.unbn3 = nn.BatchNorm1d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.unconv4 = nn.ConvTranspose1d(in_channels=64, out_channels=32, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True) self.unbn4 = nn.BatchNorm1d(num_features=32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.unconv5 = nn.ConvTranspose1d(in_channels=32, out_channels=1, kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True)
def __init__( self, feat_in, feat_out, feat_hidden, stride_layers, non_stride_layers=0, kernel_size=11, init_mode="xavier_uniform", activation="relu", stride_transpose=True, ): super().__init__() if ((stride_layers + non_stride_layers) > 0) and (kernel_size < 3 or kernel_size % 2 == 0): raise ValueError("Kernel size in this decoder needs to be >= 3 and odd when using at least 1 conv layer.") activation = jasper_activations[activation]() self.feat_in = feat_in self.feat_out = feat_out self.feat_hidden = feat_hidden self.decoder_layers = [nn.Conv1d(self.feat_in, self.feat_hidden, kernel_size=1, bias=True)] for i in range(stride_layers): self.decoder_layers.append(activation) if stride_transpose: self.decoder_layers.append( nn.ConvTranspose1d( self.feat_hidden, self.feat_hidden, kernel_size, stride=2, padding=(kernel_size - 3) // 2 + 1, output_padding=1, bias=True, groups=self.feat_hidden, ) ) else: self.decoder_layers.append( nn.Conv1d( self.feat_hidden, self.feat_hidden, kernel_size, stride=2, padding=(kernel_size - 1) // 2, bias=True, groups=self.feat_hidden, ) ) self.decoder_layers.append(nn.Conv1d(self.feat_hidden, self.feat_hidden, kernel_size=1, bias=True)) self.decoder_layers.append(nn.BatchNorm1d(self.feat_hidden, eps=1e-3, momentum=0.1)) for i in range(non_stride_layers): self.decoder_layers.append(activation) self.decoder_layers.append( nn.Conv1d( self.feat_hidden, self.feat_hidden, kernel_size, bias=True, groups=self.feat_hidden, padding=kernel_size // 2, ) ) self.decoder_layers.append(nn.Conv1d(self.feat_hidden, self.feat_hidden, kernel_size=1, bias=True)) self.decoder_layers.append(nn.BatchNorm1d(self.feat_hidden, eps=1e-3, momentum=0.1)) self.decoder_layers.append(activation) self.decoder_layers.append(nn.Conv1d(self.feat_hidden, self.feat_out, kernel_size=1, bias=True)) self.decoder_layers = nn.Sequential(*self.decoder_layers) self.apply(lambda x: init_weights(x, mode=init_mode))
def __init__(self, N, L, B, H, P, X, R, S=1): super(ResidualTN, self).__init__() # Number of sources to produce self.S, self.N, self.L, self.B, self.H, self.P = S, N, L, B, H, P self.X, self.R = X, R # Front end self.fe = nn.ModuleList([ nn.Conv1d(in_channels=1, out_channels=N, kernel_size=L, stride=L // 2, padding=L // 2), nn.ReLU(), ]) self.ln = nn.BatchNorm1d(N) self.l1 = nn.Conv1d(in_channels=N, out_channels=B, kernel_size=1) # Separation module # Residual connections self.residual_to_from = [[] for _ in range(R * X)] self.residual_to_from[8] = [-1] self.residual_to_from[16] = [-1, 8] self.residual_to_from[24] = [-1, 8, 16] self.residual_to_from[11] = [3] self.residual_to_from[19] = [3, 11] self.residual_to_from[27] = [3, 11, 19] self.layer_to_dense = {} j = 0 for i, res_connections in enumerate(self.residual_to_from): if len(res_connections): self.layer_to_dense[i] = j j += 1 self.residual_denses = nn.ModuleList([ nn.Conv1d(in_channels=len(res_connections) * B, out_channels=B, kernel_size=1) for res_connections in self.residual_to_from if len(res_connections) > 0 ]) self.prev_connections = {} self.residual_norms = [] k = 0 for res_from in self.residual_to_from: for res_ind in res_from: if res_ind not in self.prev_connections: self.prev_connections[res_ind] = k k += 1 self.residual_norms.append(CepstralNorm(B)) self.residual_norms = nn.ModuleList(self.residual_norms) self.sm = nn.ModuleList([ ResidualTN.TCN(B=B, H=H, P=P, D=2**d) for _ in range(R) for d in range(X) ]) if B != N: self.reshape_before_masks = nn.Conv1d(in_channels=B, out_channels=N, kernel_size=1) # Masks layer self.m = nn.Conv2d(in_channels=1, out_channels=S, kernel_size=(N + 1, 1), padding=(N - N // 2, 0)) # Back end self.be = nn.ConvTranspose1d(in_channels=N * S, out_channels=S, output_padding=(L // 2) - 1, kernel_size=L, stride=L // 2, padding=L // 2, groups=S) self.ln_mask_in = nn.BatchNorm1d(self.N)
def __init__(self, nwin=5, in_channels=4, out_channels=2, start_filts=128): super(stabnet, self).__init__() self.conv1 = nn.Conv1d(in_channels * (nwin - 1), start_filts, kernel_size=3, stride=1, dilation=1, padding=1) self.conv2 = nn.Conv1d(start_filts, start_filts * 2, kernel_size=4, stride=2, dilation=1, padding=1) self.conv3 = nn.Conv1d(start_filts * 2, start_filts * 2, kernel_size=3, stride=1, dilation=1, padding=1) self.conv4 = nn.Conv1d(start_filts * 2, start_filts * 4, kernel_size=4, stride=2, dilation=1, padding=1) self.conv5 = nn.Conv1d(start_filts * 4, start_filts * 4, kernel_size=3, stride=1, dilation=1, padding=1) self.conv6 = nn.Conv1d(start_filts * 4, start_filts * 4, kernel_size=3, stride=1, dilation=1, padding=1) self.conv7 = nn.Conv1d(start_filts * 4, start_filts * 8, kernel_size=4, stride=2, dilation=1, padding=1) self.conv8 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=2, padding=2) self.conv9 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=2, padding=2) self.conv10 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=2, padding=2) self.conv1_1 = nn.Conv1d(in_channels * (nwin - 1), start_filts, kernel_size=3, stride=1, dilation=1, padding=1) self.conv2_1 = nn.Conv1d(start_filts, start_filts * 2, kernel_size=4, stride=2, dilation=1, padding=1) self.conv3_1 = nn.Conv1d(start_filts * 2, start_filts * 2, kernel_size=3, stride=1, dilation=1, padding=1) self.conv4_1 = nn.Conv1d(start_filts * 2, start_filts * 4, kernel_size=4, stride=2, dilation=1, padding=1) self.conv5_1 = nn.Conv1d(start_filts * 4, start_filts * 4, kernel_size=3, stride=1, dilation=1, padding=1) self.conv6_1 = nn.Conv1d(start_filts * 4, start_filts * 4, kernel_size=3, stride=1, dilation=1, padding=1) self.conv7_1 = nn.Conv1d(start_filts * 4, start_filts * 8, kernel_size=4, stride=2, dilation=1, padding=1) self.conv8_1 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=2, padding=2) self.conv9_1 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=2, padding=2) self.conv10_1 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=2, padding=2) self.conv11 = nn.ConvTranspose1d(start_filts * 32, start_filts * 8, kernel_size=4, stride=2, dilation=1, padding=1) self.conv12 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=1, padding=1) self.conv13 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=1, padding=1) self.conv14 = nn.ConvTranspose1d(start_filts * 16, start_filts * 4, kernel_size=4, stride=2, dilation=1, padding=1) self.conv15 = nn.Conv1d(start_filts * 4, start_filts * 4, kernel_size=3, stride=1, dilation=1, padding=1) self.conv16 = nn.ConvTranspose1d(start_filts * 8, start_filts * 2, kernel_size=4, stride=2, dilation=1, padding=1) self.conv17 = nn.Conv1d(start_filts * 2, start_filts * 2, kernel_size=3, stride=1, dilation=1, padding=1) self.conv18 = nn.Conv1d(start_filts * 2, out_channels * (nwin - 2), kernel_size=1, stride=1, dilation=1, padding=0) self.conv11_1 = nn.ConvTranspose1d(start_filts * 32, start_filts * 8, kernel_size=4, stride=2, dilation=1, padding=1) self.conv12_1 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=1, padding=1) self.conv13_1 = nn.Conv1d(start_filts * 8, start_filts * 8, kernel_size=3, stride=1, dilation=1, padding=1) self.conv14_1 = nn.ConvTranspose1d(start_filts * 16, start_filts * 4, kernel_size=4, stride=2, dilation=1, padding=1) self.conv15_1 = nn.Conv1d(start_filts * 4, start_filts * 4, kernel_size=3, stride=1, dilation=1, padding=1) self.conv16_1 = nn.ConvTranspose1d(start_filts * 8, start_filts * 2, kernel_size=4, stride=2, dilation=1, padding=1) self.conv17_1 = nn.Conv1d(start_filts * 2, start_filts * 2, kernel_size=3, stride=1, dilation=1, padding=1) self.conv18_1 = nn.Conv1d(start_filts * 2, out_channels * (nwin - 2), kernel_size=1, stride=1, dilation=1, padding=0) self.bn2 = nn.BatchNorm1d(start_filts * 2) self.bn4 = nn.BatchNorm1d(start_filts * 4) self.bn8 = nn.BatchNorm1d(start_filts * 8) self.bn16 = nn.BatchNorm1d(start_filts * 16) self.linconv1 = nn.Conv1d(start_filts * 2, start_filts, kernel_size=3, stride=1, dilation=1, padding=1) self.linconv2 = nn.Conv1d(start_filts, 2, kernel_size=1, stride=1, dilation=1, padding=0) self.lin1 = nn.Linear(2 * 512, 512) self.lin2 = nn.Linear(512, 4 * (nwin - 2)) #self.batchnorm for m in self.modules(): if isinstance(m, nn.Conv1d): nn.init.kaiming_normal(m.weight.data) #m.weight.data.fill_(0) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.Linear): nn.init.constant(m.weight.data, 0)
def __init__(self, up_scale: int, in_channels: int, out_channels: int, filter_lengths: Union[Sequence[int], int], deconv_filter_length: Optional[int] = None, groups: int = 1, dropouts: Union[Sequence[float], float] = 0.0, mode: str = "deconv", **config) -> NoReturn: """ finished, NOT checked, Parameters ---------- up_scale: int, scale of up sampling in_channels: int, number of channels in the input out_channels: int, number of channels produced by the convolutional layers filter_lengths: int or sequence of int, length(s) of the filters (kernel size) of the convolutional layers deconv_filter_length: int, only used when `mode` == "deconv" length(s) of the filters (kernel size) of the deconvolutional upsampling layer groups: int, default 1, not used currently, connection pattern (of channels) of the inputs and outputs dropouts: float or sequence of float, default 0.0, dropout ratio after each `Conv_Bn_Activation` mode: str, default "deconv", case insensitive, mode of up sampling config: dict, other parameters, including activation choices, weight initializer, batch normalization choices, etc. for the deconvolutional layers """ super().__init__() self.__up_scale = up_scale self.__in_channels = in_channels self.__out_channels = out_channels self.__deconv_filter_length = deconv_filter_length self.__mode = mode.lower() assert self.__mode in self.__MODES__ self.config = ED(deepcopy(config)) if self.__DEBUG__: print( f"configuration of {self.__name__} is as follows\n{dict_to_str(self.config)}" ) # the following has to be checked # if bilinear, use the normal convolutions to reduce the number of channels if self.__mode == "deconv": self.__deconv_padding = max( 0, (self.__deconv_filter_length - self.__up_scale) // 2) self.up = nn.ConvTranspose1d( in_channels=self.__in_channels, out_channels=self.__in_channels, kernel_size=self.__deconv_filter_length, stride=self.__up_scale, padding=self.__deconv_padding, ) else: self.up = nn.Upsample( scale_factor=self.__up_scale, mode=mode, ) self.conv = TripleConv( # `+ self.__out_channels` corr. to the output of the corr. down layer in_channels=self.__in_channels + self.__out_channels[-1], out_channels=self.__out_channels, filter_lengths=filter_lengths, subsample_lengths=1, groups=groups, dropouts=dropouts, **(self.config), )
def __init__(self): super().__init__() in_channels = 23 self.input_conv = nn.Sequential( nn.Conv1d(in_channels, in_channels, kernel_size=7, stride=1, padding=3), nn.BatchNorm1d(in_channels), nn.Tanh()) self.downsampler = nn.Sequential() self.downsampler.add_module( 'CONV_1', nn.Conv1d(in_channels, 8, kernel_size=7, stride=2, padding=3)) self.downsampler.add_module('BN_1', nn.BatchNorm1d(8)) self.downsampler.add_module('TANH_1', nn.Tanh()) self.downsampler.add_module( 'CONV_2', nn.Conv1d(8, 16, kernel_size=7, stride=2, padding=3)) self.downsampler.add_module('BN_2', nn.BatchNorm1d(16)) self.downsampler.add_module('TANH_2', nn.Tanh()) self.downsampler.add_module( 'CONV_3', nn.Conv1d(16, 32, kernel_size=7, stride=2, padding=3)) self.downsampler.add_module('BN_3', nn.BatchNorm1d(32)) self.downsampler.add_module('TANH_3', nn.Tanh()) self.downsampler.add_module( 'CONV_4', nn.Conv1d(32, 64, kernel_size=7, stride=2, padding=3)) self.downsampler.add_module('BN_4', nn.BatchNorm1d(64)) self.downsampler.add_module('TANH_4', nn.Tanh()) self.upsampler = nn.Sequential() self.upsampler.add_module( 'CONVTRANS_1', nn.ConvTranspose1d(64, 32, kernel_size=7, stride=2, padding=3, output_padding=1)) self.upsampler.add_module('BN_1', nn.BatchNorm1d(32)) self.upsampler.add_module('TANH_1', nn.Tanh()) self.upsampler.add_module( 'CONVTRANS_2', nn.ConvTranspose1d(32, 16, kernel_size=7, stride=2, padding=3, output_padding=1)) self.upsampler.add_module('BN_2', nn.BatchNorm1d(16)) self.upsampler.add_module('TANH_2', nn.Tanh()) self.upsampler.add_module( 'CONVTRANS_3', nn.ConvTranspose1d(16, 8, kernel_size=7, stride=2, padding=3, output_padding=1)) self.upsampler.add_module('BN_3', nn.BatchNorm1d(8)) self.upsampler.add_module('TANH_3', nn.Tanh()) self.upsampler.add_module( 'CONVTRANS_4', nn.ConvTranspose1d(8, in_channels, kernel_size=7, stride=2, padding=3, output_padding=1)) self.upsampler.add_module('BN_4', nn.BatchNorm1d(in_channels)) self.upsampler.add_module('TANH_4', nn.Tanh()) self.output_conv = nn.Conv1d(in_channels, 1, kernel_size=7, stride=1, padding=3)
def __init__(self, chs=(128, 64)): super().__init__() self.chs = chs self.upconvs = nn.ModuleList([nn.ConvTranspose1d(chs[i], chs[i+1], 2, 2) for i in range(len(chs)-1)]) self.dec_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)])
def __init__(self): super(Conv_Decoder, self).__init__() self.conv1tr = nn.ConvTranspose1d(emb2, emb1, 3) self.conv2tr = nn.ConvTranspose1d(emb1, emb0, 3) self.conv3tr = nn.ConvTranspose1d(emb0, 2 * num_channels, 3)
def __init__(self, input_shape, z_shape=20, output_shape=11): super(VAE_without_label, self).__init__() self.input_shape = input_shape self.z_shape = z_shape self.output_shape = output_shape # encoder self.encoder = nn.Sequential() self.encoder.add_module( 'enc_conv1', nn.Conv1d(in_channels=3, out_channels=9, kernel_size=16, stride=10, padding=6, padding_mode='zeros')) self.encoder.add_module('enc_relu1', nn.ReLU(True)) self.encoder.add_module( 'enc_conv2', nn.Conv1d(in_channels=9, out_channels=9, kernel_size=16, stride=10, padding=6, padding_mode='zeros')) self.encoder.add_module('enc_relu2', nn.ReLU(True)) self.encoder.add_module( 'enc_conv3', nn.Conv1d(in_channels=9, out_channels=9, kernel_size=16, stride=10, padding=6, padding_mode='zeros')) self.encoder.add_module('enc_relu3', nn.ReLU(True)) # z to mean self.encmean_fc11 = nn.Linear(int(input_shape / 10 / 10 / 10 * 9), z_shape) # z to var self.encvar_fc12 = nn.Linear(int(input_shape / 10 / 10 / 10 * 9), z_shape) # decoder self.dec_fc1 = nn.Linear(z_shape, int(input_shape / 10 / 10 / 10 * 9)) self.decoder = nn.Sequential() self.decoder.add_module( 'dec_deconv1', nn.ConvTranspose1d(in_channels=9, out_channels=9, kernel_size=16, stride=10, padding=3, padding_mode='zeros')) self.decoder.add_module('dec_relu1', nn.ReLU(True)) self.decoder.add_module( 'dec_deconv2', nn.ConvTranspose1d(in_channels=9, out_channels=9, kernel_size=16, stride=10, padding=3, padding_mode='zeros')) self.decoder.add_module('dec_relu2', nn.ReLU(True)) self.decoder.add_module( 'dec_deconv3', nn.ConvTranspose1d(in_channels=9, out_channels=3, kernel_size=16, stride=10, padding=3, padding_mode='zeros')) self.decoder.add_module('dec_sig1', nn.Sigmoid())
def wn_conv_transpose1d(*args, **kwargs): return nn.utils.weight_norm(nn.ConvTranspose1d(*args, **kwargs))
def __init__( self, sources: int = 2, n_audio_channels: int = 2, # pylint: disable=redefined-outer-name kernel_size: int = 8, stride: int = 4, context: int = 3, depth: int = 6, channels: int = 64, growth: float = 2.0, lstm_layers: int = 2, rescale: float = 0.1, upsample: bool = False, location_shifts=12): # pylint: disable=redefined-outer-name super().__init__() self.sources = sources self.n_audio_channels = n_audio_channels self.kernel_size = kernel_size self.stride = stride self.context = context self.depth = depth self.channels = channels self.growth = growth self.lstm_layers = lstm_layers self.rescale = rescale self.upsample = upsample self.location_shifts = location_shifts self.encoder = nn.ModuleList() # Source encoder self.decoder = nn.ModuleList() # Audio output decoder self.loc_decoder = nn.ModuleList() # Location decoder self.final = None if upsample: self.final = nn.Conv1d(channels + n_audio_channels, sources * n_audio_channels, 1) stride = 1 activation = nn.GLU(dim=1) in_channels = n_audio_channels # Number of input channels in_loc_channels = 3 # Number of input location channels # Wave U-Net structure for index in range(depth): encode = [] encode += [ nn.Conv1d(in_channels, channels, kernel_size, stride), nn.ReLU() ] encode += [nn.Conv1d(channels, 2 * channels, 1), activation] self.encoder.append(nn.Sequential(*encode)) decode = [] if index > 0: out_channels = in_channels out_loc_channels = 3 else: if upsample: out_channels = channels else: out_channels = sources * n_audio_channels out_loc_channels = sources * 3 decode += [nn.Conv1d(channels, 2 * channels, context), activation] if upsample: decode += [ nn.Conv1d(channels, out_channels, kernel_size, stride=1) ] else: decode += [ nn.ConvTranspose1d(channels, out_channels, kernel_size, stride) ] if index > 0: decode.append(nn.ReLU()) self.decoder.insert(0, nn.Sequential(*decode)) loc_decoder = [] loc_decoder += [ nn.ConvTranspose1d(in_loc_channels, out_loc_channels, kernel_size, stride) ] if index > 0: loc_decoder.append(nn.ReLU()) self.loc_decoder.insert(0, nn.Sequential(*loc_decoder)) in_channels = channels channels = int(growth * channels) # Bi-directional LSTM for the bottleneck layer channels = in_channels self.lstm = nn.LSTM(bidirectional=True, num_layers=lstm_layers, hidden_size=channels, input_size=channels) self.lstm_linear = nn.Linear(2 * channels, channels) self.loc_prediction = nn.Linear(2 * channels, 3) # self.loc_prediction = nn.Linear(18 * 2048, 2*self.location_shifts + 1) rescale_module(self, reference=rescale)
def __init__(self, sources=2, audio_channels=1, channels=80, depth=6, rewrite=True, glu=True, upsample=False, rescale=0.1, kernel_size=8, stride=4, growth=2., lstm_layers=2, context=3): """ Args: sources (int): number of sources to separate audio_channels (int): stereo or mono channels (int): first convolution channels depth (int): number of encoder/decoder layers rewrite (bool): add 1x1 convolution to each encoder layer and a convolution to each decoder layer. For the decoder layer, `context` gives the kernel size. glu (bool): use glu instead of ReLU upsample (bool): use linear upsampling with convolutions Wave-U-Net style, instead of transposed convolutions rescale (int): rescale initial weights of convolutions to get their standard deviation closer to `rescale` kernel_size (int): kernel size for convolutions stride (int): stride for convolutions growth (float): multiply (resp divide) number of channels by that for each layer of the encoder (resp decoder) lstm_layers (int): number of lstm layers, 0 = no lstm context (int): kernel size of the convolution in the decoder before the transposed convolution. If > 1, will provide some context from neighboring time steps. """ super().__init__() self.audio_channels = audio_channels self.sources = sources self.kernel_size = kernel_size self.context = context self.stride = stride self.depth = depth self.upsample = upsample self.channels = channels self.encoder = nn.ModuleList() self.decoder = nn.ModuleList() self.final = None if upsample: self.final = nn.Conv1d(channels + audio_channels, sources * audio_channels, 1) stride = 1 if glu: activation = nn.GLU(dim=1) ch_scale = 2 else: activation = nn.ReLU() ch_scale = 1 in_channels = audio_channels for index in range(depth): encode = [] encode += [ nn.Conv1d( in_channels, channels, kernel_size, stride, # padding=(kernel_size - 1)//2 ), nn.ReLU() ] if rewrite: encode += [ nn.Conv1d( channels, ch_scale * channels, 1, # padding=0 ), activation ] self.encoder.append(nn.Sequential(*encode)) decode = [] if index > 0: out_channels = in_channels else: if upsample: out_channels = channels else: out_channels = sources * audio_channels if rewrite: decode += [ nn.Conv1d( channels, ch_scale * channels, context, # padding=(context - 1)//2 ), activation ] if upsample: decode += [ nn.Conv1d( channels, out_channels, kernel_size, # padding=(kernel_size - 1)//2, stride=1), ] else: decode += [ nn.ConvTranspose1d( channels, out_channels, kernel_size, stride, # padding=(kernel_size)//2-1, # output_padding=stride // 2 ) ] if index > 0: decode.append(nn.ReLU()) self.decoder.insert(0, nn.Sequential(*decode)) in_channels = channels channels = int(growth * channels) channels = in_channels if lstm_layers: self.lstm = BLSTM(channels, lstm_layers) else: self.lstm = None if rescale: rescale_module(self, reference=rescale)
def __init__(self, in_channels=80, out_channels=1, proj_kernel=7, base_channels=512, upsample_factors=(8, 8, 2, 2), res_kernel=3, num_res_blocks=3): super(MelganGenerator, self).__init__() # assert model parameters assert (proj_kernel - 1) % 2 == 0, " [!] proj_kernel should be an odd number." # setup additional model parameters base_padding = (proj_kernel - 1) // 2 act_slope = 0.2 self.inference_padding = 2 # initial layer layers = [] layers += [ nn.ReflectionPad1d(base_padding), weight_norm( nn.Conv1d(in_channels, base_channels, kernel_size=proj_kernel, stride=1, bias=True)) ] # upsampling layers and residual stacks for idx, upsample_factor in enumerate(upsample_factors): layer_in_channels = base_channels // (2**idx) layer_out_channels = base_channels // (2**(idx + 1)) layer_filter_size = upsample_factor * 2 layer_stride = upsample_factor layer_output_padding = upsample_factor % 2 layer_padding = upsample_factor // 2 + layer_output_padding layers += [ nn.LeakyReLU(act_slope), weight_norm( nn.ConvTranspose1d(layer_in_channels, layer_out_channels, layer_filter_size, stride=layer_stride, padding=layer_padding, output_padding=layer_output_padding, bias=True)), ResidualStack(channels=layer_out_channels, num_res_blocks=num_res_blocks, kernel_size=res_kernel) ] layers += [nn.LeakyReLU(act_slope)] # final layer layers += [ nn.ReflectionPad1d(base_padding), weight_norm( nn.Conv1d(layer_out_channels, out_channels, proj_kernel, stride=1, bias=True)), nn.Tanh() ] self.layers = nn.Sequential(*layers)
def test_builder_to_backend_stress( self, use_cpu_only, backend, conv_dim, padding, DHWKdKhKw, stride, dilation, has_bias, groups, test_symbolic, test_output_shape, ): if test_symbolic and test_output_shape: # conv_transpose output_shape can only be constant (non-symbolic) return if backend[0] == "mlprogram" and groups == 2: pytest.xfail( "rdar://81999134 (ConvTranspose with group > 1 crashing on both CPU and GPU backend)" ) D, H, W, Kd, Kh, Kw = DHWKdKhKw N, C_in, C_out = 1, 1 * groups, 2 * groups import torch import torch.nn as nn isDeconv1d = conv_dim == "conv1d" isDeconv2d = conv_dim == "conv2d" if isDeconv1d: strides = [stride[0]] dilations = [dilation[0]] kernels = [Kh] m = nn.ConvTranspose1d( C_in, C_out, kernels, stride=strides, dilation=dilations, bias=has_bias, groups=groups, padding=padding[0], ) input_shape = [N, C_in, H] paddings = [padding[0], padding[0]] elif isDeconv2d: strides = [stride[0], stride[1]] dilations = [dilation[0], dilation[1]] kernels = [Kh, Kw] m = nn.ConvTranspose2d( C_in, C_out, kernels, stride=strides, dilation=dilations, bias=has_bias, groups=groups, padding=(padding[0], padding[1]), ) input_shape = [N, C_in, H, W] paddings = [padding[0], padding[0], padding[1], padding[1]] else: strides = [stride[0], stride[1], stride[2]] dilations = [dilation[0], dilation[1], dilation[2]] kernels = [Kd, Kh, Kw] m = nn.ConvTranspose3d( C_in, C_out, kernels, stride=strides, dilation=dilations, bias=has_bias, groups=groups, padding=padding, ) input_shape = [N, C_in, D, H, W] paddings = [ padding[0], padding[0], padding[1], padding[1], padding[2], padding[2], ] wts = m.state_dict() weight = wts["weight"].detach().numpy() bias = wts["bias"].detach().numpy() if has_bias else None input = torch.randn(*input_shape) output = m(input) output = output.detach().numpy() input = input.detach().numpy() output_shape = list(output.shape) if test_symbolic: # For symbolic input test # Make Batch Size and input channel as symbolic symbolic_batch_size = get_new_symbol() input_shape[0] = symbolic_batch_size output_shape[0] = symbolic_batch_size expected_output_types = tuple(output_shape[:]) + (types.fp32, ) expected_outputs = [output] input_placeholders = {"x": mb.placeholder(shape=input_shape)} input_values = {"x": input} def build(x): arguments = { "x": x, "weight": weight, "pad": paddings, "pad_type": "custom", "strides": strides, "dilations": dilations, "groups": groups, } if has_bias: arguments["bias"] = bias if test_output_shape: arguments["output_shape"] = output.shape return mb.conv_transpose(**arguments) run_compare_builder( build, input_placeholders, input_values, expected_output_types, expected_outputs, use_cpu_only=use_cpu_only, frontend_only=False, backend=backend, )
def __init__(self, featureDim, latentDim): super(autoencoder_3conv_vect_vae_conditional, self).__init__() self.inputFeat_dim = 5 # +1 self.input_frameLeng = 120 self.encode_frameLeng = self.input_frameLeng / 8 #15 self.encodeDim_conv1 = 10 self.encodeDim_conv2 = 20 self.encodeDim_conv3 = 40 self.encodeDim_lin1 = 100 self.latentDim = latentDim #100 self.encoder_conv_1 = nn.Sequential( #nn.Dropout(0.25), nn.Conv1d(self.inputFeat_dim + 1, self.encodeDim_conv1, kernel_size=5, stride=2, padding=2), #+1 for the label nn.ReLU(True), nn.BatchNorm1d(self.encodeDim_conv1)) self.encoder_conv_2 = nn.Sequential( #nn.Dropout(0.25), nn.Conv1d(self.encodeDim_conv1, self.encodeDim_conv2, kernel_size=5, stride=2, padding=2), nn.ReLU(True), nn.BatchNorm1d(self.encodeDim_conv2)) self.encoder_conv_3 = nn.Sequential( #nn.Dropout(0.25), nn.Conv1d(self.encodeDim_conv2, self.encodeDim_conv3, kernel_size=5, stride=2, padding=2), nn.ReLU(True), nn.BatchNorm1d(self.encodeDim_conv3)) self.encoder_lin1 = nn.Sequential( nn.Linear(self.encodeDim_conv3 * self.encode_frameLeng, self.encodeDim_lin1), #40x15 (600) -> 100 nn.ReLU(True), nn.BatchNorm1d(self.encodeDim_lin1)) self.encoder_lin21 = nn.Linear(self.encodeDim_lin1, self.latentDim) self.encoder_lin22 = nn.Linear(self.encodeDim_lin1, self.latentDim) self.decoder_lin1 = nn.Sequential( nn.Linear(self.latentDim + 1, self.encodeDim_lin1), #+1 for the label nn.ReLU(True), nn.BatchNorm1d(self.encodeDim_lin1)) self.decoder_lin2 = nn.Sequential( nn.Linear(self.encodeDim_lin1, self.encodeDim_conv3 * self.encode_frameLeng), nn.ReLU(True), nn.BatchNorm1d(self.encodeDim_conv3 * self.encode_frameLeng)) self.decoder_conv_1 = nn.Sequential( #nn.MaxUnpool1d(kernel_size=2, stride=2), #nn.Dropout(0.25), nn.ConvTranspose1d(self.encodeDim_conv3, self.encodeDim_conv2, kernel_size=5, stride=2, padding=2, output_padding=1), ) self.decoder_conv_2 = nn.Sequential( #nn.MaxUnpool1d(kernel_size=2, stride=2), #nn.Dropout(0.25), nn.ConvTranspose1d(self.encodeDim_conv2, self.encodeDim_conv1, kernel_size=5, stride=2, padding=2, output_padding=1), ) self.decoder_conv_3 = nn.Sequential( #nn.MaxUnpool1d(kernel_size=2, stride=2), #nn.Dropout(0.25), nn.ConvTranspose1d(self.encodeDim_conv1, self.inputFeat_dim, kernel_size=5, stride=2, padding=2, output_padding=1), )
def _init_module(self, x): self._init_params(x) self.module = nn.ConvTranspose1d(*self.args, **self.kwargs)