def __init__( self, feat_in, num_classes, emb_sizes=[1024, 1024], pool_mode='xvector', init_mode="xavier_uniform", ): super().__init__() if type(emb_sizes) is str: emb_sizes = emb_sizes.split(',') else: emb_sizes = list(emb_sizes) self._num_classes = num_classes self._pooling = StatsPoolLayer(feat_in=feat_in, pool_mode=pool_mode) self._feat_in = self._pooling.feat_in shapes = [self._feat_in] for size in emb_sizes: shapes.append(int(size)) emb_layers = [] for shape_in, shape_out in zip(shapes[:-1], shapes[1:]): layer = self.affineLayer(shape_in, shape_out, learn_mean=False) emb_layers.append(layer) self.emb_layers = nn.ModuleList(emb_layers) self.final = nn.Linear(shapes[-1], self._num_classes) self.apply(lambda x: init_weights(x, mode=init_mode)) self.to(self._device)
def __init__( self, feat_in: int, num_classes: int, init_mode: Optional[str] = "xavier_uniform", return_logits: bool = True, pooling_type='avg', ): super().__init__() self._feat_in = feat_in self._return_logits = return_logits self._num_classes = num_classes if pooling_type == 'avg': self.pooling = torch.nn.AdaptiveAvgPool1d(1) elif pooling_type == 'max': self.pooling = torch.nn.AdaptiveMaxPool1d(1) else: raise ValueError( 'Pooling type chosen is not valid. Must be either `avg` or `max`' ) self.decoder_layers = torch.nn.Sequential( torch.nn.Linear(self._feat_in, self._num_classes, bias=True)) self.apply(lambda x: init_weights(x, mode=init_mode))
def __init__(self, *, feat_in, num_classes, init_mode="xavier_uniform", return_logits=True, pooling_type='avg', **kwargs): TrainableNM.__init__(self, **kwargs) self._feat_in = feat_in self._return_logits = return_logits self._num_classes = num_classes if pooling_type == 'avg': self.pooling = nn.AdaptiveAvgPool1d(1) elif pooling_type == 'max': self.pooling = nn.AdaptiveMaxPool1d(1) else: raise ValueError( 'Pooling type chosen is not valid. Must be either `avg` or `max`' ) self.decoder_layers = nn.Sequential( nn.Linear(self._feat_in, self._num_classes, bias=True)) self.apply(lambda x: init_weights(x, mode=init_mode)) self.to(self._device)
def __init__(self, feat_in, num_classes, init_mode="xavier_uniform", vocabulary=None, quant_mode='none', quant_bit=8): super().__init__() self.quant_mode = quant_mode if vocabulary is not None: if num_classes != len(vocabulary): raise ValueError( f"If vocabulary is specified, it's length should be equal to the num_classes. Instead got: num_classes={num_classes} and len(vocabulary)={len(vocabulary)}" ) self.__vocabulary = vocabulary self._feat_in = feat_in # Add 1 for blank char self._num_classes = num_classes + 1 self.act = QuantAct(quant_bit, quant_mode=self.quant_mode, per_channel=False) conv = torch.nn.Conv1d(self._feat_in, self._num_classes, kernel_size=1, bias=True) qconv = QuantConv1d(quant_bit, bias_bit=32, quant_mode=self.quant_mode, per_channel=True) qconv.set_param(conv) self.decoder_layers = torch.nn.Sequential(qconv) self.apply(lambda x: init_weights(x, mode=init_mode))
def __init__(self, feat_in, num_classes, init_mode="xavier_uniform", vocabulary=None): super().__init__() if vocabulary is not None: if num_classes != len(vocabulary): raise ValueError( f"If vocabulary is specified, it's length should be equal to the num_classes. Instead got: num_classes={num_classes} and len(vocabulary)={len(vocabulary)}" ) self.__vocabulary = vocabulary self._feat_in = feat_in # Add 1 for blank char self._num_classes = num_classes + 1 self.decoder_layers = torch.nn.Sequential( torch.nn.Conv1d(self._feat_in, self._num_classes, kernel_size=1, bias=True) ) self.apply(lambda x: init_weights(x, mode=init_mode))
def __init__( self, feat_in, num_classes, emb_sizes=None, pool_mode='xvector', angular=False, init_mode="xavier_uniform", ): super().__init__() self.angular = angular self.emb_id = 2 if self.angular: bias = False else: bias = True if type(emb_sizes) is str: emb_sizes = emb_sizes.split(',') elif type(emb_sizes) is int: emb_sizes = [emb_sizes] else: emb_sizes = [512, 512] self.input_feat_in = feat_in self._num_classes = num_classes self._pooling = StatsPoolLayer(feat_in=feat_in, pool_mode=pool_mode) self._feat_in = self._pooling.feat_in shapes = [self._feat_in] for size in emb_sizes: shapes.append(int(size)) emb_layers = [] for shape_in, shape_out in zip(shapes[:-1], shapes[1:]): layer = self.affineLayer(shape_in, shape_out, learn_mean=False) emb_layers.append(layer) self.emb_layers = nn.ModuleList(emb_layers) self.final = nn.Linear(shapes[-1], self._num_classes, bias=bias) self.apply(lambda x: init_weights(x, mode=init_mode))
def __init__(self, feat_in, num_classes, emb_sizes=[1024, 1024], pool_mode='xvector', init_mode="xavier_uniform"): TrainableNM.__init__(self) self._feat_in = 0 if pool_mode == 'gram': gram = True super_vector = False elif pool_mode == 'superVector': gram = True super_vector = True else: gram = False super_vector = False if gram: self._feat_in += feat_in**2 else: self._feat_in += 2 * feat_in if super_vector and gram: self._feat_in += 2 * feat_in self._midEmbd1 = int(emb_sizes[0]) # Spkr Vector Embedding Shape self._midEmbd2 = int(emb_sizes[1]) if len( emb_sizes) > 1 else 0 # Spkr Vector Embedding Shape self._num_classes = num_classes self._pooling = StatsPoolLayer(gram=gram, super_vector=super_vector) self.mid1 = self.affineLayer(self._feat_in, self._midEmbd1, learn_mean=False) self.mid2 = self.affineLayer(self._midEmbd1, self._midEmbd2, learn_mean=False) self.final = nn.Linear(self._midEmbd2, self._num_classes) self.apply(lambda x: init_weights(x, mode=init_mode)) self.to(self._device)
def __init__( self, jasper, activation: str, feat_in: int, normalization_mode: str = "batch", residual_mode: str = "add", norm_groups: int = -1, conv_mask: bool = True, frame_splicing: int = 1, init_mode: Optional[str] = 'xavier_uniform', quantize: bool = False, ): super().__init__() if isinstance(jasper, ListConfig): jasper = OmegaConf.to_container(jasper) activation = jasper_activations[activation]() feat_in = feat_in * frame_splicing self._feat_in = feat_in residual_panes = [] encoder_layers = [] self.dense_residual = False for lcfg in jasper: dense_res = [] if lcfg.get('residual_dense', False): residual_panes.append(feat_in) dense_res = residual_panes self.dense_residual = True groups = lcfg.get('groups', 1) separable = lcfg.get('separable', False) heads = lcfg.get('heads', -1) residual_mode = lcfg.get('residual_mode', residual_mode) se = lcfg.get('se', False) se_reduction_ratio = lcfg.get('se_reduction_ratio', 8) se_context_window = lcfg.get('se_context_size', -1) se_interpolation_mode = lcfg.get('se_interpolation_mode', 'nearest') kernel_size_factor = lcfg.get('kernel_size_factor', 1.0) stride_last = lcfg.get('stride_last', False) future_context = lcfg.get('future_context', -1) encoder_layers.append( JasperBlock( feat_in, lcfg['filters'], repeat=lcfg['repeat'], kernel_size=lcfg['kernel'], stride=lcfg['stride'], dilation=lcfg['dilation'], dropout=lcfg['dropout'], residual=lcfg['residual'], groups=groups, separable=separable, heads=heads, residual_mode=residual_mode, normalization=normalization_mode, norm_groups=norm_groups, activation=activation, residual_panes=dense_res, conv_mask=conv_mask, se=se, se_reduction_ratio=se_reduction_ratio, se_context_window=se_context_window, se_interpolation_mode=se_interpolation_mode, kernel_size_factor=kernel_size_factor, stride_last=stride_last, future_context=future_context, quantize=quantize, )) feat_in = lcfg['filters'] self._feat_out = feat_in self.encoder = torch.nn.Sequential(*encoder_layers) self.apply(lambda x: init_weights(x, mode=init_mode))
def __init__(self, feat_in=128 * 8, emb_size=128, init_mode="xavier_uniform"): super().__init__() self.linear = nn.Linear(feat_in, emb_size) self.apply(lambda x: init_weights(x, mode=init_mode)) self.to(self._device) self.emb_size= emb_size
def __init__( self, jasper, activation, feat_in, normalization_mode="batch", residual_mode="add", norm_groups=-1, conv_mask=True, frame_splicing=1, init_mode='xavier_uniform', ): super().__init__() activation = jasper_activations[activation]() feat_in = feat_in * frame_splicing self.__feat_in = feat_in residual_panes = [] encoder_layers = [] self.dense_residual = False for lcfg in jasper: dense_res = [] if lcfg.get('residual_dense', False): residual_panes.append(feat_in) dense_res = residual_panes self.dense_residual = True groups = lcfg.get('groups', 1) separable = lcfg.get('separable', False) heads = lcfg.get('heads', -1) residual_mode = lcfg.get('residual_mode', residual_mode) se = lcfg.get('se', False) se_reduction_ratio = lcfg.get('se_reduction_ratio', 8) se_context_window = lcfg.get('se_context_window', -1) se_interpolation_mode = lcfg.get('se_interpolation_mode', 'nearest') kernel_size_factor = lcfg.get('kernel_size_factor', 1.0) stride_last = lcfg.get('stride_last', False) encoder_layers.append( JasperBlock( feat_in, lcfg['filters'], repeat=lcfg['repeat'], kernel_size=lcfg['kernel'], stride=lcfg['stride'], dilation=lcfg['dilation'], dropout=lcfg['dropout'], residual=lcfg['residual'], groups=groups, separable=separable, heads=heads, residual_mode=residual_mode, normalization=normalization_mode, norm_groups=norm_groups, activation=activation, residual_panes=dense_res, conv_mask=conv_mask, se=se, se_reduction_ratio=se_reduction_ratio, se_context_window=se_context_window, se_interpolation_mode=se_interpolation_mode, kernel_size_factor=kernel_size_factor, stride_last=stride_last, )) feat_in = lcfg['filters'] self.encoder = nn.Sequential(*encoder_layers) self.apply(lambda x: init_weights(x, mode=init_mode)) self.to(self._device)