def __init__( self, hidden_size, num_classes, activation='relu', log_softmax=True, dropout=0.0, use_transformer_pretrained=True, ): super().__init__() if activation not in ACT2FN: raise ValueError(f'activation "{activation}" not found') self.dense = nn.Linear(hidden_size, hidden_size) self.act = ACT2FN[activation] self.norm = nn.LayerNorm(hidden_size, eps=1e-12) self.mlp = MultiLayerPerceptron(hidden_size, num_classes, self._device, num_layers=1, activation=activation, log_softmax=log_softmax) self.dropout = nn.Dropout(dropout) if use_transformer_pretrained: self.apply( lambda module: transformer_weights_init(module, xavier=False)) self.to(self._device)
def __init__( self, hidden_size, punct_num_classes, capit_num_classes, punct_num_layers=2, capit_num_layers=2, activation='relu', log_softmax=True, dropout=0.0, use_transformer_pretrained=True, ): # Pass name up the module class hierarchy. super().__init__() self.dropout = nn.Dropout(dropout) self.punct_mlp = MultiLayerPerceptron(hidden_size, punct_num_classes, self._device, punct_num_layers, activation, log_softmax) self.capit_mlp = MultiLayerPerceptron(hidden_size, capit_num_classes, self._device, capit_num_layers, activation, log_softmax) if use_transformer_pretrained: self.apply( lambda module: transformer_weights_init(module, xavier=False))
def __init__(self, hidden_size, num_intents, num_slots, dropout=0.0, use_transformer_pretrained=True, **kwargs): super().__init__(**kwargs) self.dropout = nn.Dropout(dropout) self.slot_mlp = MultiLayerPerceptron(hidden_size, num_classes=num_slots, device=self._device, num_layers=2, activation='relu', log_softmax=False) self.intent_mlp = MultiLayerPerceptron( hidden_size, num_classes=num_intents, device=self._device, num_layers=2, activation='relu', log_softmax=False, ) if use_transformer_pretrained: self.apply( lambda module: transformer_weights_init(module, xavier=False))
def __init__(self, hidden_size, activation='tanh', dropout=0.0, use_transformer_pretrained=True): super().__init__() self.fc = nn.Linear(hidden_size, hidden_size).to(self._device) if activation not in ACTIVATIONS_F: raise ValueError(f'{activation} is not in supported ' + '{ACTIVATIONS_F.keys()}') self.activation = ACTIVATIONS_F[activation]() self.dropout1 = nn.Dropout(dropout) self.dropout2 = nn.Dropout(dropout) if use_transformer_pretrained: self.apply(lambda module: transformer_weights_init(module, xavier=False))
def __init__( self, hidden_size, num_classes, num_layers=2, activation='relu', log_softmax=True, dropout=0.0, use_transformer_pretrained=True, ): super().__init__() self.mlp = MultiLayerPerceptron(hidden_size, num_classes, self._device, num_layers, activation, log_softmax) self.dropout = nn.Dropout(dropout) if use_transformer_pretrained: self.apply( lambda module: transformer_weights_init(module, xavier=False))
def __init__( self, vocab_size, d_model, d_inner, max_seq_length, num_layers, num_attn_heads, ffn_dropout=0.0, embedding_dropout=0.0, attn_score_dropout=0.0, attn_layer_dropout=0.0, learn_positional_encodings=False, hidden_act='relu', mask_future=False, ): super().__init__() self.embedding_layer = TransformerEmbedding( vocab_size=vocab_size, hidden_size=d_model, max_sequence_length=max_seq_length, embedding_dropout=embedding_dropout, learn_positional_encodings=learn_positional_encodings, ) self.encoder = TransformerEncoder( num_layers=num_layers, hidden_size=d_model, mask_future=mask_future, num_attention_heads=num_attn_heads, inner_size=d_inner, ffn_dropout=ffn_dropout, hidden_act=hidden_act, attn_score_dropout=attn_score_dropout, attn_layer_dropout=attn_layer_dropout, ) std_init_range = 1 / math.sqrt(d_model) self.apply( lambda module: transformer_weights_init(module, std_init_range)) self.to(self._device)