def __init__(self, n_z=256, ncoef=13, proj_size=0, sm_type='none'): super(lcnn_9layers, self).__init__() self.conv1 = nn.Conv2d(1, 16, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False) self.bn1 = nn.BatchNorm2d(16) self.activation = nn.ELU() self.features = nn.Sequential( mfm(16, 48, 5, 1, 2), nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), group(48, 96, 3, 1, 1), nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), group(96, 192, 3, 1, 1), nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), group(192, 128, 3, 1, 1), group(128, 128, 3, 1, 1), nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) ) self.attention = SelfAttention(128) self.fc = nn.Linear(128,128) self.fc1 = mfm(128, 128, type=0) self.fc2 = nn.Linear(128, n_z) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, block=resblock, layers=[1, 2, 3, 4], n_z=256, ncoef=13, proj_size=0, sm_type='none'): super(lcnn_29layers_v2, self).__init__() self.conv1_ = nn.Conv2d(1, 16, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False) self.bn1 = nn.BatchNorm2d(16) self.activation = nn.ELU() self.conv1 = mfm(16, 48, 5, 1, 2) self.block1 = self._make_layer(block, layers[0], 48, 48) self.group1 = group(48, 96, 3, 1, 1) self.block2 = self._make_layer(block, layers[1], 96, 96) self.group2 = group(96, 192, 3, 1, 1) self.block3 = self._make_layer(block, layers[2], 192, 192) self.group3 = group(192, 128, 3, 1, 1) self.block4 = self._make_layer(block, layers[3], 128, 128) self.group4 = group(128, 128, 3, 1, 1) self.attention = SelfAttention(128) self.fc = nn.Linear(128,128) self.fc1 = nn.Linear(128, n_z) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, layers=[3,4,6,3], block=Bottleneck, proj_size=0, ncoef=23, sm_type='none'): self.inplanes = 16 super(ResNet_lstm, self).__init__() self.conv1 = nn.Conv2d(1, 16, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False) self.bn1 = nn.BatchNorm2d(16) self.activation = nn.ELU() self.layer1 = self._make_layer(block, 16, layers[0],stride=1) self.layer2 = self._make_layer(block, 32, layers[1], stride=1) self.layer3 = self._make_layer(block, 64, layers[2], stride=2) self.layer4 = self._make_layer(block, 128, layers[3], stride=2) self.lstm = nn.LSTM(512, 256, 2, bidirectional=True, batch_first=False) self.fc = nn.Linear(512+256,512) self.lbn = nn.BatchNorm1d(512) self.fc_mu = nn.Linear(512, n_z) self.initialize_params() self.attention = SelfAttention(512) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, layers=[3,4,6,3], block=PreActBlock, proj_size=0, ncoef=23, sm_type='none', delta=False): self.in_planes = 16 super(ResNet_2d, self).__init__() self.conv1 = nn.Conv2d(3 if delta else 1, 16, kernel_size=3, stride=1, padding=1, bias=False) self.layer1 = self._make_layer(block, 64, layers[0], stride=1) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.conv_out = nn.Conv2d(block.expansion*512, 512, kernel_size=(6,1), stride=1, padding=0, bias=False) self.fc = nn.Linear(512*2,512) self.lbn = nn.BatchNorm1d(512) self.fc_mu = nn.Linear(512, n_z) self.initialize_params() self.attention = SelfAttention(512) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none', delta=False): super(TDNN_logpool, self).__init__() self.delta=delta self.model = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 3, dilation=2, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 3, dilation=3, padding=3), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 1500, 1), nn.BatchNorm1d(1500), nn.ReLU(inplace=True) ) self.pooling = StatisticalPooling() self.post_pooling_1_1 = nn.Linear(1500, 512) self.post_pooling_1_2 = nn.Sequential(nn.BatchNorm1d(512), nn.ReLU(inplace=True) ) self.post_pooling_2_1 = nn.Linear(512, 512) self.post_pooling_2_2 = nn.Sequential(nn.BatchNorm1d(512), nn.ReLU(inplace=True) ) self.post_pooling_2_3 = nn.Linear(512, proj_size) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=proj_size, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=proj_size, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, layers=[3,4,6,3], block=PreActBottleneck, proj_size=0, ncoef=23, sm_type='none', delta=False): self.in_planes = 32 super(ResNet_qrnn, self).__init__() self.conv1 = nn.Conv2d(3 if delta else 1, 32, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False) self.layer1 = self._make_layer(block, 64, layers[0], stride=1) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) from torchqrnn import QRNN self.qrnn = QRNN(block.expansion*512, 512, num_layers=2, dropout=0.3) self.fc = nn.Linear(1536,512) self.lbn = nn.BatchNorm1d(512) self.fc_mu = nn.Linear(512, n_z) self.initialize_params() self.attention = SelfAttention(512) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, proj_size=0, ncoef=100, sm_type='none'): super(TDNN_mfcc, self).__init__() self.model = nn.Sequential( nn.BatchNorm1d(ncoef), nn.Conv1d(ncoef, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 3, dilation=2, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 3, dilation=3, padding=3), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 1500, 1), nn.BatchNorm1d(1500), nn.ReLU(inplace=True)) self.pooling = StatisticalPooling() self.post_pooling = nn.Sequential(nn.Conv1d(3000, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1)) if proj_size > 0 and sm_type != 'none': if sm_type == 'softmax': self.out_proj = Softmax(input_features=n_z, output_features=proj_size) elif sm_type == 'am_softmax': self.out_proj = AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none'): super(cnn_lstm_mfcc, self).__init__() self.features = nn.Sequential( nn.Conv2d(1, 32, kernel_size=(ncoef,3), padding=(0,2), stride=(1,1), bias=False), nn.BatchNorm2d(32), nn.ELU(), nn.Conv2d(32, 64, kernel_size=(1,5), padding=(0,1), stride=(1,2), bias=False), nn.BatchNorm2d(64), nn.ELU(), nn.Conv2d(64, 128, kernel_size=(1,5), padding=(0,1), stride=(1,2), bias=False), nn.BatchNorm2d(128), nn.ELU(), nn.Conv2d(128, 256, kernel_size=(1,5), padding=(0,1), stride=(1,2), bias=False), nn.BatchNorm2d(256), nn.ELU() ) self.lstm = nn.LSTM(256, 512, 2, bidirectional=True, batch_first=False) self.fc_mu = nn.Sequential( nn.Linear(512*2, n_z) ) self.initialize_params() if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, ncoef=13, proj_size=0, sm_type='none'): ''' The FTDNN architecture from "State-of-the-art speaker recognition with neural network embeddings in NIST SRE18 and Speakers in the Wild evaluations" https://www.sciencedirect.com/science/article/pii/S0885230819302700 ''' super(FTDNN, self).__init__() self.layer01 = TDNN_(input_dim=ncoef, output_dim=512, context_size=5, padding=2) self.layer02 = FTDNNLayer(512, 1024, 256, context_size=2, dilations=[ 2, 2, 2], paddings=[1, 1, 1]) self.layer03 = FTDNNLayer(1024, 1024, 256, context_size=1, dilations=[1, 1, 1], paddings=[0, 0, 0]) self.layer04 = FTDNNLayer(1024, 1024, 256, context_size=2, dilations=[3, 3, 2], paddings=[2, 1, 1]) self.layer05 = FTDNNLayer(2048, 1024, 256, context_size=1, dilations=[1, 1, 1], paddings=[0, 0, 0]) self.layer06 = FTDNNLayer(1024, 1024, 256, context_size=2, dilations=[3, 3, 2], paddings=[2, 1, 1]) self.layer07 = FTDNNLayer(3072, 1024, 256, context_size=2, dilations=[3, 3, 2], paddings=[2, 1, 1]) self.layer08 = FTDNNLayer(1024, 1024, 256, context_size=2, dilations=[3, 3, 2], paddings=[2, 1, 1]) self.layer09 = FTDNNLayer(3072, 1024, 256, context_size=1, dilations=[1, 1, 1], paddings=[0, 0, 0]) self.layer10 = DenseReLU(1024, 2048) self.layer11 = StatsPool() self.post_pooling_1 = DenseReLU(4096, 512) self.post_pooling_2 = DenseReLU(512, n_z) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none', delta=False): super(transformer_enc, self).__init__() self.delta=delta self.pre_encoder = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 7), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 5), nn.BatchNorm1d(512), nn.ReLU(inplace=True) ) self.transformer_encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=512, nhead=8, dim_feedforward=768, dropout=0.1), num_layers=5, norm=nn.LayerNorm(512) ) self.pooling = StatisticalPooling() self.post_pooling_1 = nn.Sequential(nn.Conv1d(1024, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True) ) self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1) ) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, nh=1, n_h=512, layers=[3,4,23,3], block=PreActBottleneck, proj_size=100, ncoef=23, dropout_prob=0.25, sm_type='softmax'): self.in_planes = 32 super(ResNet_large, self).__init__() self.conv1 = nn.Conv2d(1, 32, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False) self.bn1 = nn.BatchNorm2d(32) self.activation = nn.ReLU() self.layer1 = self._make_layer(block, 64, layers[0], stride=1) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.fc = nn.Linear(block.expansion*512*2,512) self.lbn = nn.BatchNorm1d(512) self.fc_mu = nn.Linear(512, n_z) self.classifier = self.make_bin_layers(n_in=2*n_z, n_h_layers=nh, h_size=n_h, dropout_p=dropout_prob) self.initialize_params() self.attention = SelfAttention(block.expansion*512) if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, pase_cfg, pase_cp=None, n_z=256, proj_size=0, ncoef=100, sm_type='none'): super(global_MLP, self).__init__() self.encoder = wf_builder(pase_cfg) if pase_cp: self.encoder.load_pretrained(pase_cp, load_last=True, verbose=False) self.model = nn.Sequential(nn.Linear(ncoef, 512), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Linear(512, 512), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Linear(512, n_z)) if proj_size > 0 and sm_type != 'none': if sm_type == 'softmax': self.out_proj = Softmax(input_features=n_z, output_features=proj_size) elif sm_type == 'am_softmax': self.out_proj = AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, pase_cfg, pase_cp=None, n_z=256, layers=[2, 2, 2, 2], block=PreActBlock, proj_size=0, ncoef=23, sm_type='none'): self.in_planes = 16 super(ResNet_18, self).__init__() self.model = nn.ModuleList() self.model.append( nn.Sequential( nn.Conv2d(1, 16, kernel_size=(2 * ncoef, 3), stride=(1, 1), padding=(0, 1), bias=False), nn.BatchNorm2d(16), nn.ReLU())) self.model.append(self._make_layer(block, 64, layers[0], stride=1)) self.model.append(self._make_layer(block, 128, layers[1], stride=2)) self.model.append(self._make_layer(block, 256, layers[2], stride=2)) self.model.append(self._make_layer(block, 512, layers[3], stride=2)) self.initialize_params() self.pooling = SelfAttention(block.expansion * 512) self.post_pooling = nn.Sequential( nn.Conv1d(block.expansion * 512 * 2, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1)) if proj_size > 0 and sm_type != 'none': if sm_type == 'softmax': self.out_proj = Softmax(input_features=n_z, output_features=proj_size) elif sm_type == 'am_softmax': self.out_proj = AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError ## Load after initialize main model params self.encoder = wf_builder(pase_cfg) if pase_cp: self.encoder.load_pretrained(pase_cp, load_last=True, verbose=False)
def __init__(self, pase_cfg, pase_cp=None, n_layers=4, n_z=256, proj_size=0, ncoef=23, sm_type='none'): super(pyr_rnn, self).__init__() self.model = nn.ModuleList( [nn.LSTM(2 * ncoef, 256, 1, bidirectional=True, batch_first=True)]) for i in range(1, n_layers): self.model.append( nn.LSTM(256 * 2 * 2, 256, 1, bidirectional=True, batch_first=True)) self.pooling = StatisticalPooling() self.post_pooling = nn.Sequential(nn.Conv1d(256 * 2 * 2 * 2, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1)) self.initialize_params() self.attention = SelfAttention(512) if proj_size > 0 and sm_type != 'none': if sm_type == 'softmax': self.out_proj = Softmax(input_features=n_z, output_features=proj_size) elif sm_type == 'am_softmax': self.out_proj = AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError self.encoder = wf_builder(pase_cfg) if pase_cp: self.encoder.load_pretrained(pase_cp, load_last=True, verbose=False)
def __init__(self, pase_cfg, pase_cp=None, n_z=256, proj_size=0, ncoef=100, sm_type='none'): super(TDNN, self).__init__() self.encoder = wf_builder(pase_cfg) if pase_cp: self.encoder.load_pretrained(pase_cp, load_last=True, verbose=False) self.model = nn.Sequential( nn.BatchNorm1d(2 * ncoef), nn.Conv1d(2 * ncoef, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 3, dilation=2, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 3, dilation=3, padding=3), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 1500, 1), nn.BatchNorm1d(1500), nn.ReLU(inplace=True)) self.pooling = StatisticalPooling() self.post_pooling = nn.Sequential(nn.Conv1d(3000, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1)) if proj_size > 0 and sm_type != 'none': if sm_type == 'softmax': self.out_proj = Softmax(input_features=n_z, output_features=proj_size) elif sm_type == 'am_softmax': self.out_proj = AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none', delta=False): super().__init__() self.delta = delta self.model = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 5, padding=3), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 7), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 1500, 1), nn.BatchNorm1d(1500), nn.ReLU(inplace=True) ) self.ASPP_block = ASPP(1500, 1500) self.post_pooling_1 = nn.Sequential(nn.Conv1d(1500, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True) ) self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1) ) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, proj_size=0, ncoef=23, n_heads=4, sm_type='none', delta=False): super(TDNN_multihead, self).__init__() self.delta=delta self.model = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 5, padding=3), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 7), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 1500, 1), nn.BatchNorm1d(1500), nn.ReLU(inplace=True) ) self.attention = nn.TransformerEncoderLayer(d_model=1500, nhead=n_heads, dim_feedforward=512, dropout=0.1) self.pooling = StatisticalPooling() self.post_pooling_1 = nn.Sequential(nn.Conv1d(1500*2, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True) ) self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1) ) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none', delta=False): super(TDNN_lstm, self).__init__() self.delta=delta self.model = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 5, padding=3), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 7), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 1500, 1), nn.BatchNorm1d(1500), nn.ReLU(inplace=True) ) self.pooling = nn.LSTM(1500, 512, 2, bidirectional=True, batch_first=False) self.attention = SelfAttention(1024) self.post_pooling_1 = nn.Sequential(nn.Conv1d(2560, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True) ) self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1) ) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, ncoef=13, proj_size=0, sm_type='none', n_heads=16): super().__init__() self.model_1 = nn.Sequential( nn.Conv1d(ncoef, 512, 5, padding=2, bias=False), nn.ReLU(inplace=True), nn.BatchNorm1d(512) ) self.model_2 = nn.Sequential( nn.Conv1d(512, 512, 5, padding=2, bias=False), nn.ReLU(inplace=True), nn.BatchNorm1d(512) ) self.model_3 = nn.Sequential( nn.Conv1d(512, 512, 5, padding=3, bias=False), nn.ReLU(inplace=True), nn.BatchNorm1d(512) ) self.model_4 = nn.Sequential( nn.Conv1d(512, 512, 7, bias=False), nn.ReLU(inplace=True), nn.BatchNorm1d(512) ) self.model_5 = nn.Sequential( nn.Conv1d(512, 512, 1, bias=False), nn.ReLU(inplace=True), nn.BatchNorm1d(512) ) self.stats_pooling = StatisticalPooling() self.multihead_pooling = nn.TransformerEncoderLayer(d_model=1024, nhead=n_heads, dim_feedforward=512, dropout=0.1) self.post_pooling_1 = nn.Sequential(nn.Linear(1024, 512, bias=False), nn.ReLU(inplace=True), nn.BatchNorm1d(512) ) self.post_pooling_2 = nn.Sequential(nn.Linear(512, 512, bias=False), nn.ReLU(inplace=True), nn.BatchNorm1d(512), nn.Linear(512, n_z) ) if proj_size>0 and sm_type!='none': if sm_type=='softmax': self.out_proj=Softmax(input_features=n_z, output_features=proj_size) elif sm_type=='am_softmax': self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, nh=1, n_h=512, layers=[3, 4, 23, 3], block=PreActBottleneck, proj_size=100, ncoef=23, dropout_prob=0.25, sm_type='softmax', ndiscriminators=1, r_proj_size=0): self.in_planes = 32 super(ResNet_large, self).__init__() self.ndiscriminators = ndiscriminators self.r_proj_size = r_proj_size self.classifier = nn.ModuleList() self.dropout_prob = dropout_prob self.n_hidden = nh self.hidden_size = n_h self.latent_size = n_z self.sm_type = sm_type self.ncoef = ncoef self.conv1 = nn.Conv2d(1, 32, kernel_size=(ncoef, 3), stride=(1, 1), padding=(0, 1), bias=False) self.layer1 = self._make_layer(block, 64, layers[0], stride=1) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.fc = nn.Linear(block.expansion * 512 * 2, 512) self.lbn = nn.BatchNorm1d(512) self.fc_mu = nn.Linear(512, n_z) self.initialize_params() if ndiscriminators > 1: for i in range(self.ndiscriminators): self.classifier.append( self.make_bin_layers(n_in=2 * 512, n_h_layers=nh, h_size=n_h, dropout_p=dropout_prob)) else: self.classifier = self.make_bin_layers(n_in=2 * 512, n_h_layers=nh, h_size=n_h, dropout_p=dropout_prob) self.attention = SelfAttention(block.expansion * 512) if sm_type == 'softmax': self.out_proj = Softmax(input_features=n_z, output_features=proj_size) elif sm_type == 'am_softmax': self.out_proj = AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
def __init__(self, n_z=256, nh=1, n_h=512, proj_size=0, ncoef=23, sm_type='none', dropout_prob=0.25, ndiscriminators=1, r_proj_size=0): super(TDNN, self).__init__() self.ndiscriminators = ndiscriminators self.r_proj_size = r_proj_size self.classifier = nn.ModuleList() self.dropout_prob = dropout_prob self.n_hidden = nh self.hidden_size = n_h self.latent_size = n_z self.sm_type = sm_type self.ncoef = ncoef self.model = nn.Sequential(nn.Conv1d(ncoef, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 5, padding=2), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 5, padding=3), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 512, 7), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, 1500, 1), nn.BatchNorm1d(1500), nn.ReLU(inplace=True)) self.pooling = StatisticalPooling() self.post_pooling_1 = nn.Sequential(nn.Conv1d(3000, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True)) self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512), nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1)) if ndiscriminators > 1: for i in range(self.ndiscriminators): self.classifier.append( self.make_bin_layers(n_in=2 * 512, n_h_layers=nh, h_size=n_h, dropout_p=dropout_prob)) else: self.classifier = self.make_bin_layers(n_in=2 * 512, n_h_layers=nh, h_size=n_h, dropout_p=dropout_prob) if proj_size > 0 and sm_type != 'none': if sm_type == 'softmax': self.out_proj = Softmax(input_features=n_z, output_features=proj_size) elif sm_type == 'am_softmax': self.out_proj = AMSoftmax(input_features=n_z, output_features=proj_size) else: raise NotImplementedError
if args.model == 'lcnn29_mfcc' or args.model == 'all': batch = torch.rand(3, 1, args.ncoef, 400) model = model_.lcnn_29layers_v2(n_z=args.latent_size, ncoef=args.ncoef) mu = model.forward(batch) print('lcnn29_mfcc', mu.size()) if args.model == 'TDNN' or args.model == 'all': batch = torch.rand(3, 1, args.ncoef, 400) model = model_.TDNN(n_z=args.latent_size, ncoef=args.ncoef) mu = model.forward(batch) print('TDNN', mu.size()) if args.model == 'TDNN_multipool' or args.model == 'all': batch = torch.rand(3, 1, args.ncoef, 400) model = model_.TDNN_multipool(n_z=args.latent_size, ncoef=args.ncoef) mu = model.forward(batch) print('TDNN_multipool', mu.size()) if args.model == 'FTDNN' or args.model == 'all': batch = torch.rand(3, 1, args.ncoef, 400) model = model_.FTDNN(n_z=args.latent_size, ncoef=args.ncoef) mu = model.forward(batch) print('FTDNN', mu.size()) if args.softmax: batch = torch.rand(3, mu.size(0)) batch_labels = torch.randint(low=0, high=10, size=(mu.size(0),)) amsm = AMSoftmax(input_features=batch.size(1), output_features=10) sm = Softmax(input_features=batch.size(1), output_features=10) print('amsm', amsm(batch, batch_labels).size()) print('sm', sm(batch).size())