def __init__( self, in_channels=1, out_channels=32, input_dim=312, hidden_dim=32, output_dim=10, ): super(cnn1d_ser, self).__init__() self.classifier = nn.Sequential( nn.Conv1d(in_channels, out_channels, 5, stride=1, padding=2), nn.BatchNorm1d(out_channels), nn.ReLU(), nn.Dropout(0.5), nn.Conv1d(out_channels, out_channels, 5, stride=1, padding=2), nn.BatchNorm1d(out_channels), nn.ReLU(), nn.Dropout(0.5), nn.Flatten(), nn.Linear(input_dim * out_channels, hidden_dim), nn.BatchNorm1d(hidden_dim), nn.ReLU(), nn.Dropout(0.5), nn.Linear(hidden_dim, output_dim), )
def __init__( self, c_in, c_cond, c_h, c_out, kernel_size, n_conv_blocks, upsample, act, sn, dropout_rate, ): super(Decoder, self).__init__() self.n_conv_blocks = n_conv_blocks self.upsample = upsample self.act = get_act(act) f = lambda x: x self.in_conv_layer = f(nn.Conv1d(c_in, c_h, kernel_size=1)) self.first_conv_layers = nn.ModuleList([ f(nn.Conv1d(c_h, c_h, kernel_size=kernel_size)) for _ in range(n_conv_blocks) ]) self.second_conv_layers = nn.ModuleList([ f(nn.Conv1d(c_h, c_h * up, kernel_size=kernel_size)) for _, up in zip(range(n_conv_blocks), self.upsample) ]) self.norm_layer = nn.InstanceNorm1d(c_h, affine=False) self.conv_affine_layers = nn.ModuleList( [f(nn.Linear(c_cond, c_h * 2)) for _ in range(n_conv_blocks * 2)]) self.out_conv_layer = f(nn.Conv1d(c_h, c_out, kernel_size=1)) self.dropout_layer = nn.Dropout(p=dropout_rate)
def __init__(self, in_channels, out_channels, kernel_size, stride, padding): super(ResidualLayer, self).__init__() self.conv1d_layer = nn.Sequential( nn.Conv1d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=padding, ), nn.InstanceNorm1d(num_features=out_channels, affine=True), ) self.conv_layer_gates = nn.Sequential( nn.Conv1d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=padding, ), nn.InstanceNorm1d(num_features=out_channels, affine=True), ) self.conv1d_out_layer = nn.Sequential( nn.Conv1d( in_channels=out_channels, out_channels=in_channels, kernel_size=kernel_size, stride=1, padding=padding, ), nn.InstanceNorm1d(num_features=in_channels, affine=True), )
def __init__(self, num_speakers=2) -> None: super(simple_CNN, self).__init__() self.convs = nn.Sequential( nn.Conv1d(1, 16, 100, stride=10), nn.BatchNorm1d(16), nn.ReLU(), nn.Conv1d(16, 64, 21, stride=10), nn.BatchNorm1d(64), nn.ReLU(), nn.Conv1d(64, 64, 5, stride=5), nn.BatchNorm1d(64), nn.ReLU(), ) self.linears = nn.Sequential(nn.Linear(1 * 6 * 64, 128), nn.Linear(128, num_speakers))
def _test_conv1d_bias_true(test_case, device): np_arr = np.array( [ [ [0.90499806, -1.11683071, 0.71605605, -0.56754625, 0.61944169], [-0.31317389, -0.26271924, 0.95579433, 0.52468461, 1.48926127], ] ] ) input = flow.tensor( np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True ) weight = np.array( [ [ [0.01997352, 0.23834395, 0.00526353], [-0.04861857, -0.22751901, -0.06725175], ], [ [0.13344523, -0.35202524, 0.15168799], [-0.25714493, -0.17459838, 0.28768948], ], [ [0.10671382, -0.28205597, -0.39752254], [0.36393702, 0.07843742, -0.33898622], ], [ [0.20485674, 0.04222689, -0.1898618], [0.22519711, -0.15910202, -0.35057363], ], ] ) bias = np.array([0.01012857, 0.38912651, -0.01600273, -0.3883304]) m = nn.Conv1d(2, 4, 3, stride=1, bias=True) m.weight = flow.nn.Parameter(flow.Tensor(weight)) m.bias = flow.nn.Parameter(flow.Tensor(bias)) m = m.to(device) np_out = np.array( [ [ [-0.22349545, -0.08447243, -0.37358052], [1.4130373, -0.04644597, 0.86949122], [-0.34765026, -0.31004351, -0.14158708], [-0.74985039, -0.87430149, -0.77354753], ] ] ) output = m(input) test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06)) output = output.sum() output.backward() np_grad = np.array( [ [ [0.4649893, 0.11147892, -0.3189539, -0.78394318, -0.43043283], [0.28337064, -0.19941133, -0.66853344, -0.95190406, -0.46912211], ] ] ) test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
def _test_conv1d_dilation(test_case, device): np_arr = np.array( [[[-0.43016902, 1.74619496, -0.57338119, 0.25563857, 0.12575546]]]) input = flow.tensor(np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True) weight = np.array([ [[-0.35057205, -0.31304273, 0.46250814]], [[-0.40786612, 0.36518192, 0.46280444]], [[-0.00921835, -0.38710043, 0.47566161]], ]) m = nn.Conv1d(1, 3, 3, stride=1, bias=False) m.weight = flow.nn.Parameter(flow.Tensor(weight)) m = m.to(device) output = m(input) np_out = np.array([[ [-0.66102189, -0.31443936, 0.17914855], [0.54776692, -0.8032915, 0.38541752], [-0.94472277, 0.32745653, -0.03385513], ]]) test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06)) output = output.sum() output.backward() np_grad = np.array( [[[-0.76765651, -1.10261774, 0.29835641, 1.06601286, 1.40097415]]]) test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
def _test_conv1d_stride(test_case, device): np_arr = np.array( [[[-1.01312506, -0.40687919, 1.5985316, 0.53594196, -1.89935565]]]) input = flow.tensor(np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True) weight = np.array([ [[0.5751484, 0.26589182, -0.026546]], [[-0.10313249, -0.20797005, -0.48268208]], [[-0.22216944, -0.14962578, 0.57433963]], ]) m = nn.Conv1d(1, 3, 3, stride=2, bias=False) m.weight = flow.nn.Parameter(flow.Tensor(weight)) m = m.to(device) output = m(input) np_out = np.array([[ [-0.73331773, 1.11231577], [-0.58247775, 0.64046454], [1.20406508, -1.5262109], ]]) test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06)) output = output.sum() output.backward() np_grad = np.array( [[[0.24984647, -0.09170401, 0.31495798, -0.09170401, 0.06511152]]]) test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
def _test_conv1d_bias_false(test_case, device): np_arr = np.array( [[[1.28795946, -0.2921792, 0.20338029, 0.78604293, -1.89607573]]]) input = flow.tensor(np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True) weight = np.array([ [[0.10197904, 0.3372305, -0.25743008]], [[0.27720425, -0.52435774, -0.38381988]], [[0.56016803, -0.10063095, -0.10760903]], ]) m = nn.Conv1d(1, 3, 3, stride=1, bias=False) m.weight = flow.nn.Parameter(flow.Tensor(weight)) m = m.to(device) output = m(input) np_out = np.array([[ [-0.01954307, -0.16356121, 0.77392507], [0.43217283, -0.48933625, 0.37196174], [0.72899038, -0.2687211, 0.23886177], ]]) test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06)) output = output.sum() output.backward() np_grad = np.array( [[[0.93935132, 0.65159315, -0.09726584, -1.03661716, -0.74885899]]]) test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
def _test_conv1d_compilcate(test_case, device): np_arr = np.array([[ [-1.00674784, 0.51784992, 0.39896572, 0.11018554, 0.91136694], [1.95886874, 0.89779067, 0.4748213, 0.33313531, -0.49350029], [-0.19280219, 0.04023677, 1.66438103, -0.83563608, 0.15925731], [1.49166429, 1.45189261, -1.86512125, 0.34329697, 0.20413807], ]]) input = flow.tensor(np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True) weight = np.array([ [ [-0.36045218, 0.37349278, 0.04565236], [0.0242328, -0.09459515, -0.30684742], ], [ [-0.30345008, -0.1196513, -0.26765293], [0.09876197, 0.03346226, 0.2748405], ], [ [-0.37798449, 0.00242459, -0.34125558], [-0.05174343, -0.10443231, 0.09526101], ], [ [0.34196907, -0.32667893, 0.40264183], [0.38025281, 0.26807079, -0.09074812], ], ]) bias = np.array([-0.03499984, -0.21616256, 0.13312563, -0.24104381]) m = nn.Conv1d(4, 4, 3, groups=2, stride=2, padding=2, dilation=2, bias=True) m.weight = flow.nn.Parameter(flow.Tensor(weight)) m.bias = flow.nn.Parameter(flow.Tensor(bias)) m = m.to(device) np_out = np.array([[ [-0.72379637, 0.67248386, 0.21977007], [-0.00643994, -0.1286152, -0.41589433], [-0.76877236, 0.29273134, -0.42040929], [1.0612179, -0.73787093, -0.37839717], ]]) output = m(input) test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06)) output = output.sum() output.backward() np_grad = np.array([[ [-0.41006082, 0.0, -0.63206136, 0.0, 0.03184089], [0.06186188, 0.0, 0.02985496, 0.0, -0.09313981], [-0.36026976, 0.0, -0.2988835, 0.0, -0.26286808], [0.49214786, 0.0, 0.49666074, 0.0, 0.16815135], ]]) test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
def __init__( self, c_in, c_h, c_out, kernel_size, bank_size, bank_scale, c_bank, n_conv_blocks, n_dense_blocks, subsample, act, dropout_rate, ): super(SpeakerEncoder, self).__init__() self.c_in = c_in self.c_h = c_h self.c_out = c_out self.kernel_size = kernel_size self.n_conv_blocks = n_conv_blocks self.n_dense_blocks = n_dense_blocks self.subsample = subsample self.act = get_act(act) self.conv_bank = nn.ModuleList([ nn.Conv1d(c_in, c_bank, kernel_size=k) for k in range(bank_scale, bank_size + 1, bank_scale) ]) in_channels = c_bank * (bank_size // bank_scale) + c_in self.in_conv_layer = nn.Conv1d(in_channels, c_h, kernel_size=1) self.first_conv_layers = nn.ModuleList([ nn.Conv1d(c_h, c_h, kernel_size=kernel_size) for _ in range(n_conv_blocks) ]) self.second_conv_layers = nn.ModuleList([ nn.Conv1d(c_h, c_h, kernel_size=kernel_size, stride=sub) for sub, _ in zip(subsample, range(n_conv_blocks)) ]) self.pooling_layer = nn.AdaptiveAvgPool1d(1) self.first_dense_layers = nn.ModuleList( [nn.Linear(c_h, c_h) for _ in range(n_dense_blocks)]) self.second_dense_layers = nn.ModuleList( [nn.Linear(c_h, c_h) for _ in range(n_dense_blocks)]) self.output_layer = nn.Linear(c_h, c_out) self.dropout_layer = nn.Dropout(p=dropout_rate)
def _test_conv1d_group_large_out_bias_true(test_case, device): np_arr = np.array( [ [ [2.17964911, 0.91623521, 1.24746692, 0.73605931, -0.23738743], [-0.70412433, 0.10727754, 1.0207864, -0.09711888, -1.10814202], ] ] ) input = flow.tensor( np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True ) weight = np.array( [ [[-0.207307473, 0.12856324, 0.371991515]], [[-0.416422307, 3.26921181e-05, -0.385845661]], [[-0.182592362, 0.143281639, 0.419321984]], [[-0.27117458, 0.0421470925, 0.377335936]], [[0.546190619, -0.211819887, -0.29785803]], [[0.334832489, 0.255918801, -0.0556600206]], ] ) bias = np.array( [-0.56865668, 0.17631066, -0.43992457, -0.24307285, -0.53672957, -0.52927947] ) m = nn.Conv1d(2, 6, 3, groups=2, stride=1, bias=True) m.weight = flow.nn.Parameter(flow.Tensor(weight)) m.bias = flow.nn.Parameter(flow.Tensor(bias)) m = m.to(device) np_out = np.array( [ [ [-0.43867296, -0.32441288, -0.82094181], [-1.21264362, -0.48919463, -0.25154343], [-0.18354186, -0.11983716, -0.66178048], [0.33756858, -0.26578707, -0.9421193], [-1.2480886, -0.66543078, 0.37145507], [-0.79440582, -0.22671542, -0.15066233], ] ] ) output = m(input) test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06)) output = output.sum() output.backward() np_grad = np.array( [ [ [-0.8063221, -0.53444451, -0.12897667, 0.6773454, 0.40546784], [0.6098485, 0.69609451, 0.71991241, 0.1100639, 0.02381789], ] ] ) test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
def downsample(self, in_channels, out_channels, kernel_size, stride, padding): self.ConvLayer = nn.Sequential( nn.Conv1d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, ), nn.InstanceNorm1d(num_features=out_channels, affine=True), GLU(), ) return self.ConvLayer
def _test_conv1d_group_large_in_bias_true(test_case, device): np_arr = np.array( [ [ [0.7382921, 0.3227571, -0.73204273, -0.01697334, 1.72585976], [0.52866709, 0.28417364, 1.12931311, 1.73048413, -0.60748184], [0.43222603, 0.7882517, -0.62105948, 0.10097823, 0.81639361], [0.36671457, 0.24468753, -0.5824874, -0.74464536, -0.38901371], ] ] ) input = flow.tensor( np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True ) weight = np.array( [ [ [-0.29574063, -0.31176069, 0.17234495], [0.06092392, 0.30691007, -0.36685407], ], [ [0.26149744, 0.07149458, 0.3209756], [0.18960869, -0.37148297, -0.13602243], ], ] ) bias = np.array([-0.35048512, -0.0093792]) m = nn.Conv1d(4, 2, 3, groups=2, stride=1, bias=True) m.weight = flow.nn.Parameter(flow.Tensor(weight)) m.bias = flow.nn.Parameter(flow.Tensor(bias)) m = m.to(device) np_out = np.array( [[[-1.09048378, -0.49156523, 0.99150705], [0.01852397, 0.54882324, 0.31657016]]] ) output = m(input) test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06)) output = output.sum() output.backward() np_grad = np.array( [ [ [-0.29574063, -0.60750133, -0.43515638, -0.13941574, 0.17234495], [0.06092392, 0.36783397, 0.0009799, -0.059944, -0.36685407], [0.26149744, 0.33299202, 0.65396762, 0.39247018, 0.3209756], [0.18960869, -0.18187428, -0.31789672, -0.50750542, -0.13602243], ] ] ) test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
def __init__( self, in_channels=256, conv_channels=512, kernel_size=3, dilation=1, norm="cLN", causal=False, ): super(Conv1DBlock, self).__init__() # 1x1 conv self.conv1x1 = Conv1D(in_channels, conv_channels, 1) self.prelu1 = nn.PReLU() self.lnorm1 = build_norm(norm, conv_channels) dconv_pad = ( (dilation * (kernel_size - 1)) // 2 if not causal else (dilation * (kernel_size - 1)) ) # depthwise conv self.dconv = nn.Conv1d( conv_channels, conv_channels, kernel_size, groups=conv_channels, padding=dconv_pad, dilation=dilation, bias=True, ) self.prelu2 = nn.PReLU() self.lnorm2 = build_norm(norm, conv_channels) # 1x1 conv cross channel self.sconv = nn.Conv1d(conv_channels, in_channels, 1, bias=True) # different padding way self.causal = causal self.dconv_pad = dconv_pad
def _test_conv1d_group_bias_true(test_case, device): np_arr = np.array( [ [ [1.48566079, 0.54937589, 0.62353903, -0.94114172, -0.60260266], [0.61150503, -0.50289607, 1.41735041, -1.85877609, -1.04875529], ] ] ) input = flow.tensor( np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True ) weight = np.array( [ [[0.25576305, 0.40814576, -0.05900212]], [[-0.24829513, 0.42756805, -0.01354307]], [[0.44658303, 0.46889144, 0.41060263]], [[0.30083328, -0.5221613, 0.12215579]], ] ) bias = np.array([-0.03368823, -0.4212504, -0.42130581, -0.17434336]) m = nn.Conv1d(2, 4, 3, groups=2, stride=1, bias=True) m.weight = flow.nn.Parameter(flow.Tensor(weight)) m.bias = flow.nn.Parameter(flow.Tensor(bias)) m = m.to(device) np_out = np.array( [ [ [0.53372419, 0.41684598, -0.22277816], [-0.56368178, -0.27830642, -0.97031319], [0.19794616, -0.74452549, -1.09052706], [0.44534814, -1.29277706, 1.09451222], ] ] ) output = m(input) test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06)) output = output.sum() output.backward() np_grad = np.array( [ [ [0.00746793, 0.84318173, 0.77063656, 0.76316863, -0.07254519], [0.74741632, 0.69414645, 1.22690487, 0.47948855, 0.53275841], ] ] ) test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): padding = (kernel_size - 1) // 2 super(ConvBNReLU, self).__init__( nn.Conv1d( in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False, ), nn.BatchNorm1d(out_planes), nn.ReLU6(), )
def __init__(self, channels, kernel_size, bias=True, dropout=0.0): super(ConformerConvolutionModule, self).__init__() assert kernel_size % 2 == 1 self.pointwise_conv1 = nn.Linear(channels, 2 * channels, bias=bias) self.depthwise_conv = nn.Conv1d( channels, channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2, groups=channels, bias=bias, ) self.batch_norm = nn.BatchNorm1d(channels) self.pointwise_conv2 = nn.Linear(channels, channels, bias=bias) self.dropout = nn.Dropout(dropout)
def __init__(self, inp, oup, stride, expand_ratio): super(InvertedResidual, self).__init__() self.stride = stride assert stride in [1, 2] hidden_dim = int(round(inp * expand_ratio)) self.use_res_connect = self.stride == 1 and inp == oup layers = [] if expand_ratio != 1: # pw layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) layers.extend([ # dw ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), # pw-linear nn.Conv1d(hidden_dim, oup, 1, 1, 0, bias=False), nn.BatchNorm1d(oup), ]) self.conv = nn.Sequential(*layers)
def __init__(self, hidden_size, vocab_size, blank=BLK, lookahead_steps=-1): super(CTCAssistor, self).__init__() self.lookahead_steps = lookahead_steps if self.lookahead_steps > 0: self.apply_look_ahead = True self.lookahead_conv = nn.Conv1d( in_channels=hidden_size, out_channels=hidden_size, kernel_size=self.lookahead_steps + 1, padding=0, stride=1, bias=False, groups=hidden_size, ) logger.info( "Apply Lookahead Step in CTCAssistor And Set it to %d" % lookahead_steps ) else: self.apply_look_ahead = False self.output_layer = nn.Linear(hidden_size, vocab_size) self.ctc_crit = nn.CTCLoss(blank=blank, zero_infinity=True)
def __init__( self, c_in, c_h, c_out, kernel_size, bank_size, bank_scale, c_bank, n_conv_blocks, subsample, act, dropout_rate, ): super(ContentEncoder, self).__init__() self.n_conv_blocks = n_conv_blocks self.subsample = subsample self.act = get_act(act) self.conv_bank = nn.ModuleList([ nn.Conv1d(c_in, c_bank, kernel_size=k) for k in range(bank_scale, bank_size + 1, bank_scale) ]) in_channels = c_bank * (bank_size // bank_scale) + c_in self.in_conv_layer = nn.Conv1d(in_channels, c_h, kernel_size=1) self.first_conv_layers = nn.ModuleList([ nn.Conv1d(c_h, c_h, kernel_size=kernel_size) for _ in range(n_conv_blocks) ]) self.second_conv_layers = nn.ModuleList([ nn.Conv1d(c_h, c_h, kernel_size=kernel_size, stride=sub) for sub, _ in zip(subsample, range(n_conv_blocks)) ]) self.norm_layer = nn.InstanceNorm1d(c_h, affine=False) self.mean_layer = nn.Conv1d(c_h, c_out, kernel_size=1) self.std_layer = nn.Conv1d(c_h, c_out, kernel_size=1) self.dropout_layer = nn.Dropout(p=dropout_rate)
def __init__(self, options): super(SincNet, self).__init__() self.cnn_N_filt = options["cnn_N_filt"] self.cnn_len_filt = options["cnn_len_filt"] self.cnn_max_pool_len = options["cnn_max_pool_len"] self.cnn_act = options["cnn_act"] self.cnn_drop = options["cnn_drop"] self.cnn_use_laynorm = options["cnn_use_laynorm"] self.cnn_use_batchnorm = options["cnn_use_batchnorm"] self.cnn_use_laynorm_inp = options["cnn_use_laynorm_inp"] self.cnn_use_batchnorm_inp = options["cnn_use_batchnorm_inp"] self.input_dim = int(options["input_dim"]) self.fs = options["fs"] self.N_cnn_lay = len(options["cnn_N_filt"]) self.conv = nn.ModuleList([]) self.bn = nn.ModuleList([]) self.ln = nn.ModuleList([]) self.act = nn.ModuleList([]) self.drop = nn.ModuleList([]) if self.cnn_use_laynorm_inp: self.ln0 = LayerNorm(self.input_dim) if self.cnn_use_batchnorm_inp: self.bn0 = nn.BatchNorm1d([self.input_dim], momentum=0.05) current_input = self.input_dim for i in range(self.N_cnn_lay): N_filt = int(self.cnn_N_filt[i]) len_filt = int(self.cnn_len_filt[i]) # dropout self.drop.append(nn.Dropout(p=self.cnn_drop[i])) # activation self.act.append(act_fun(self.cnn_act[i])) # layer norm initialization self.ln.append( LayerNorm(( N_filt, int((current_input - self.cnn_len_filt[i] + 1) / self.cnn_max_pool_len[i]), ))) self.bn.append( nn.BatchNorm1d( N_filt, int((current_input - self.cnn_len_filt[i] + 1) / self.cnn_max_pool_len[i]), momentum=0.05, )) if i == 0: self.conv.append( SincConv_fast(self.cnn_N_filt[0], self.cnn_len_filt[0], self.fs)) else: self.conv.append( nn.Conv1d(self.cnn_N_filt[i - 1], self.cnn_N_filt[i], self.cnn_len_filt[i])) current_input = int((current_input - self.cnn_len_filt[i] + 1) / self.cnn_max_pool_len[i]) self.out_dim = current_input * N_filt
def __init__(self, d_in, d_hid, dropout=0.1): super(PositionwiseFeedForwardUseConv, self).__init__() self.w_1 = nn.Conv1d(d_in, d_hid, 1) self.w_2 = nn.Conv1d(d_hid, d_in, 1) self.layer_norm = nn.LayerNorm(d_in) self.dropout = nn.Dropout(dropout)
def __init__(self): super(Generator, self).__init__() # 2D Conv Layer self.conv1 = nn.Conv2d( in_channels=1, out_channels=128, kernel_size=(5, 15), stride=(1, 1), padding=(2, 7), ) self.conv1_gates = nn.Conv2d( in_channels=1, out_channels=128, kernel_size=(5, 15), stride=1, padding=(2, 7), ) # 2D Downsample Layer self.downSample1 = downSample_Generator(in_channels=128, out_channels=256, kernel_size=5, stride=2, padding=2) self.downSample2 = downSample_Generator(in_channels=256, out_channels=256, kernel_size=5, stride=2, padding=2) # 2D -> 1D Conv self.conv2dto1dLayer = nn.Sequential( nn.Conv1d(in_channels=2304, out_channels=256, kernel_size=1, stride=1, padding=0), nn.InstanceNorm1d(num_features=256, affine=True), ) # Residual Blocks self.residualLayer1 = ResidualLayer(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1) self.residualLayer2 = ResidualLayer(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1) self.residualLayer3 = ResidualLayer(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1) self.residualLayer4 = ResidualLayer(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1) self.residualLayer5 = ResidualLayer(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1) self.residualLayer6 = ResidualLayer(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1) # 1D -> 2D Conv self.conv1dto2dLayer = nn.Sequential( nn.Conv1d(in_channels=256, out_channels=2304, kernel_size=1, stride=1, padding=0), nn.InstanceNorm1d(num_features=2304, affine=True), ) # UpSample Layer self.upSample1 = self.upSample(in_channels=256, out_channels=1024, kernel_size=5, stride=1, padding=2) self.upSample2 = self.upSample(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=2) self.lastConvLayer = nn.Conv2d( in_channels=128, out_channels=1, kernel_size=(5, 15), stride=(1, 1), padding=(2, 7), )
def __init__(self, num_features, num_classes): super(Wav2Letter, self).__init__() self.layers = nn.Sequential( nn.Conv1d(num_features, 250, 48, 2), nn.ReLU(), nn.Conv1d(250, 250, 7), nn.ReLU(), nn.Conv1d(250, 250, 7), nn.ReLU(), nn.Conv1d(250, 250, 7), nn.ReLU(), nn.Conv1d(250, 250, 7), nn.ReLU(), nn.Conv1d(250, 250, 7), nn.ReLU(), nn.Conv1d(250, 250, 7), nn.ReLU(), nn.Conv1d(250, 250, 7), nn.ReLU(), nn.Conv1d(250, 2000, 32), nn.ReLU(), nn.Conv1d(2000, 2000, 1), nn.ReLU(), nn.Conv1d(2000, num_classes, 1), )
def __init__(self, input_shape=(80, 64), residual_in_channels=256): super(Generator, self).__init__() Cx, Tx = input_shape self.flattened_channels = (Cx // 4) * residual_in_channels # 2D Conv Layer self.conv1 = nn.Conv2d( in_channels=2, out_channels=residual_in_channels // 2, kernel_size=(5, 15), stride=(1, 1), padding=(2, 7), ) self.conv1_gates = nn.Conv2d( in_channels=2, out_channels=residual_in_channels // 2, kernel_size=(5, 15), stride=1, padding=(2, 7), ) # 2D Downsampling Layers self.downSample1 = DownSampleGenerator( in_channels=residual_in_channels // 2, out_channels=residual_in_channels, kernel_size=5, stride=2, padding=2, ) self.downSample2 = DownSampleGenerator( in_channels=residual_in_channels, out_channels=residual_in_channels, kernel_size=5, stride=2, padding=2, ) # 2D -> 1D Conv self.conv2dto1dLayer = nn.Conv1d( in_channels=self.flattened_channels, out_channels=residual_in_channels, kernel_size=1, stride=1, padding=0, ) self.conv2dto1dLayer_tfan = nn.InstanceNorm1d( num_features=residual_in_channels, affine=True ) # Residual Blocks self.residualLayer1 = ResidualLayer( in_channels=residual_in_channels, out_channels=residual_in_channels * 2, kernel_size=3, stride=1, padding=1, ) self.residualLayer2 = ResidualLayer( in_channels=residual_in_channels, out_channels=residual_in_channels * 2, kernel_size=3, stride=1, padding=1, ) self.residualLayer3 = ResidualLayer( in_channels=residual_in_channels, out_channels=residual_in_channels * 2, kernel_size=3, stride=1, padding=1, ) self.residualLayer4 = ResidualLayer( in_channels=residual_in_channels, out_channels=residual_in_channels * 2, kernel_size=3, stride=1, padding=1, ) self.residualLayer5 = ResidualLayer( in_channels=residual_in_channels, out_channels=residual_in_channels * 2, kernel_size=3, stride=1, padding=1, ) self.residualLayer6 = ResidualLayer( in_channels=residual_in_channels, out_channels=residual_in_channels * 2, kernel_size=3, stride=1, padding=1, ) # 1D -> 2D Conv self.conv1dto2dLayer = nn.Conv1d( in_channels=residual_in_channels, out_channels=self.flattened_channels, kernel_size=1, stride=1, padding=0, ) self.conv1dto2dLayer_tfan = nn.InstanceNorm1d( num_features=self.flattened_channels, affine=True ) # UpSampling Layers self.upSample1 = self.upsample( in_channels=residual_in_channels, out_channels=residual_in_channels * 4, kernel_size=5, stride=1, padding=2, ) self.glu = GLU() self.upSample2 = self.upsample( in_channels=residual_in_channels, out_channels=residual_in_channels * 2, kernel_size=5, stride=1, padding=2, ) # 2D Conv Layer self.lastConvLayer = nn.Conv2d( in_channels=residual_in_channels // 2, out_channels=1, kernel_size=(5, 15), stride=(1, 1), padding=(2, 7), )