def __init__( self, args, src_dict, dst_dict, embed_tokens, num_chars=50, char_embed_dim=32, char_cnn_params="[(128, 3), (128, 5)]", char_cnn_nonlinear_fn="tanh", char_cnn_num_highway_layers=0, use_pretrained_weights=False, finetune_pretrained_weights=False, ): super().__init__(args, src_dict, dst_dict, embed_tokens) convolutions_params = literal_eval(char_cnn_params) self.char_cnn_encoder = char_encoder.CharCNNModel( dictionary=dst_dict, num_chars=num_chars, char_embed_dim=char_embed_dim, convolutions_params=convolutions_params, nonlinear_fn_type=char_cnn_nonlinear_fn, num_highway_layers=char_cnn_num_highway_layers, # char_cnn_output_dim should match the word embedding dimension. char_cnn_output_dim=embed_tokens.embedding_dim, use_pretrained_weights=use_pretrained_weights, finetune_pretrained_weights=finetune_pretrained_weights, ) self.char_layer_norm = nn.LayerNorm(embed_tokens.embedding_dim)
def __init__( self, args, src_dict, dst_dict, embed_tokens, num_chars=50, char_embed_dim=32, char_cnn_params="[(128, 3), (128, 5)]", char_cnn_nonlinear_fn="tanh", char_cnn_num_highway_layers=0, use_pretrained_weights=False, finetune_pretrained_weights=False, ): super().__init__(args, src_dict, dst_dict, embed_tokens) convolutions_params = literal_eval(char_cnn_params) self.char_cnn_encoder = char_encoder.CharCNNModel( dictionary=dst_dict, num_chars=num_chars, char_embed_dim=char_embed_dim, convolutions_params=convolutions_params, nonlinear_fn_type=char_cnn_nonlinear_fn, num_highway_layers=char_cnn_num_highway_layers, # char_cnn_output_dim should match the word embedding dimension. char_cnn_output_dim=embed_tokens.embedding_dim, use_pretrained_weights=use_pretrained_weights, finetune_pretrained_weights=finetune_pretrained_weights, ) self.char_layer_norm = nn.LayerNorm(embed_tokens.embedding_dim) # By default (before training ends), character representations are # not precomputed. After precomputation, this value should be used in place of # the two embeddings. self._is_precomputed = False self.combined_word_char_embed = nn.Embedding( embed_tokens.num_embeddings, embed_tokens.embedding_dim )
def __init__( self, args, dictionary, embed_tokens, num_chars=50, embed_dim=32, char_cnn_params="[(128, 3), (128, 5)]", char_cnn_nonlinear_fn="tanh", char_cnn_pool_type="max", char_cnn_num_highway_layers=0, char_cnn_output_dim=-1, use_pretrained_weights=False, finetune_pretrained_weights=False, weights_file=None, ): super().__init__(dictionary) convolutions_params = literal_eval(char_cnn_params) self.char_cnn_encoder = char_encoder.CharCNNModel( dictionary, num_chars, embed_dim, convolutions_params, char_cnn_nonlinear_fn, char_cnn_pool_type, char_cnn_num_highway_layers, char_cnn_output_dim, use_pretrained_weights, finetune_pretrained_weights, weights_file, ) self.embed_tokens = embed_tokens token_embed_dim = embed_tokens.embedding_dim self.word_layer_norm = nn.LayerNorm(token_embed_dim) char_embed_dim = ( char_cnn_output_dim if char_cnn_output_dim != -1 else sum(out_dim for (out_dim, _) in convolutions_params) ) self.char_layer_norm = nn.LayerNorm(char_embed_dim) self.word_dim = char_embed_dim + token_embed_dim self.char_scale = math.sqrt(char_embed_dim / self.word_dim) self.word_scale = math.sqrt(token_embed_dim / self.word_dim) if self.word_dim != args.encoder_embed_dim: self.word_to_transformer_embed = fairseq_transformer.Linear( self.word_dim, args.encoder_embed_dim ) self.dropout = args.dropout self.padding_idx = dictionary.pad() self.embed_positions = fairseq_transformer.PositionalEmbedding( 1024, args.encoder_embed_dim, self.padding_idx, learned=args.encoder_learned_pos, ) self.transformer_encoder_given_embeddings = TransformerEncoderGivenEmbeddings( args=args, proj_to_decoder=True ) # Variable tracker self.tracker = VariableTracker() # Initialize adversarial mode self.set_gradient_tracking_mode(False) self.set_embed_noising_mode(False) # disables sorting and word-length thresholding if True # (enables ONNX tracing of length-sorted input with batch_size = 1) self.onnx_export_model = False
def __init__( self, dictionary, num_chars=50, embed_dim=32, token_embed_dim=256, freeze_embed=False, char_cnn_params="[(128, 3), (128, 5)]", char_cnn_output_dim=256, char_cnn_nonlinear_fn="tanh", char_cnn_pool_type="max", char_cnn_num_highway_layers=0, hidden_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, residual_level=None, bidirectional=False, word_dropout_params=None, ): super().__init__(dictionary) self.dictionary = dictionary self.dropout_in = dropout_in self.dropout_out = dropout_out self.residual_level = residual_level self.hidden_dim = hidden_dim self.bidirectional = bidirectional convolutions_params = literal_eval(char_cnn_params) self.char_cnn_encoder = char_encoder.CharCNNModel( dictionary, num_chars, embed_dim, convolutions_params, char_cnn_nonlinear_fn, char_cnn_pool_type, char_cnn_num_highway_layers, ) self.embed_tokens = None num_tokens = len(dictionary) self.padding_idx = dictionary.pad() if token_embed_dim > 0: self.embed_tokens = rnn.Embedding( num_embeddings=num_tokens, embedding_dim=token_embed_dim, padding_idx=self.padding_idx, freeze_embed=freeze_embed, ) self.word_dim = (sum(out_dim for (out_dim, _) in convolutions_params) + token_embed_dim) self.layers = nn.ModuleList([]) for layer in range(num_layers): is_layer_bidirectional = self.bidirectional and layer == 0 if is_layer_bidirectional: assert hidden_dim % 2 == 0, ( "encoder_hidden_dim must be even if encoder_bidirectional " "(to be divided evenly between directions)" ) self.layers.append( rnn.LSTMSequenceEncoder.LSTM( self.word_dim if layer == 0 else hidden_dim, hidden_dim // 2 if is_layer_bidirectional else hidden_dim, num_layers=1, dropout=self.dropout_out, bidirectional=is_layer_bidirectional, ) ) self.num_layers = len(self.layers) self.word_dropout_module = None if ( word_dropout_params and word_dropout_params["word_dropout_freq_threshold"] is not None and word_dropout_params["word_dropout_freq_threshold"] > 0 ): self.word_dropout_module = word_dropout.WordDropout( dictionary, word_dropout_params )
def __init__( self, dictionary, num_chars=50, unk_only_char_encoding=False, embed_dim=32, token_embed_dim=256, freeze_embed=False, normalize_embed=False, char_cnn_params="[(128, 3), (128, 5)]", char_cnn_nonlinear_fn="tanh", char_cnn_pool_type="max", char_cnn_num_highway_layers=0, char_cnn_output_dim=-1, hidden_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, residual_level=None, bidirectional=False, word_dropout_params=None, use_pretrained_weights=False, finetune_pretrained_weights=False, weights_file=None, ): super().__init__(dictionary) self.dropout_in = dropout_in convolutions_params = literal_eval(char_cnn_params) self.char_cnn_encoder = char_encoder.CharCNNModel( dictionary, num_chars, embed_dim, convolutions_params, char_cnn_nonlinear_fn, char_cnn_pool_type, char_cnn_num_highway_layers, char_cnn_output_dim, use_pretrained_weights, finetune_pretrained_weights, weights_file, ) self.embed_tokens = None num_tokens = len(dictionary) self.padding_idx = dictionary.pad() self.unk_idx = dictionary.unk() if token_embed_dim > 0: self.embed_tokens = rnn.Embedding( num_embeddings=num_tokens, embedding_dim=token_embed_dim, padding_idx=self.padding_idx, freeze_embed=freeze_embed, normalize_embed=normalize_embed, ) self.word_dim = ( char_cnn_output_dim if char_cnn_output_dim != -1 else sum(out_dim for (out_dim, _) in convolutions_params) ) self.token_embed_dim = token_embed_dim self.unk_only_char_encoding = unk_only_char_encoding if self.unk_only_char_encoding: assert char_cnn_output_dim == token_embed_dim, ( "char_cnn_output_dim (%d) must equal to token_embed_dim (%d)" % (char_cnn_output_dim, token_embed_dim) ) self.word_dim = token_embed_dim else: self.word_dim = self.word_dim + token_embed_dim self.bilstm = rnn.BiLSTM( num_layers=num_layers, bidirectional=bidirectional, embed_dim=self.word_dim, hidden_dim=hidden_dim, dropout=dropout_out, residual_level=residual_level, ) # Variable tracker self.tracker = VariableTracker() # Initialize adversarial mode self.set_gradient_tracking_mode(False) self.set_embed_noising_mode(False)