def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def build_embedding(dictionary, embed_dim, path=None): padding_idx = dictionary.pad() eos_index = dictionary.eos() emb = Embedding(len(dictionary), embed_dim, padding_idx, eos_index) # if provided, load from preloaded dictionaries if path: embed_dict = parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def build_embedding(dictionary, embed_dim, path=None, freeze=False): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = TransformerTokenEmbedding(num_embeddings, embed_dim, padding_idx, freeze) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def build_embedding(dictionary, embed_dim, path=None, sde=False): num_embeddings = len(dictionary) padding_idx = dictionary.pad() if sde: emb = SDEembedding(char_vsize=num_embeddings, d_vec=embed_dim, padding_idx=padding_idx) else: emb = Embedding(num_embeddings, embed_dim, padding_idx, fix_norm=args.fix_norm) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def build_embedding(dictionary, embed_dim, args, mask_file=None, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() if args.one_emb: emb = OneEmbedding(num_embeddings, embed_dim, padding_idx, args.one_emb, args.one_emb_dropout, args.one_emb_std, args.codenum, args.codebooknum, args.one_emb_layernum, args.one_emb_inter_dim, args.one_emb_relu_dropout, mask_file) else: emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def build_embedding(cls, args, dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) '''for i in range(0,10): print(dictionary[i]) print("********")''' return emb
def build_embedding(dictionary, embed_dim, path=None): # construct and return an embedding layer; # load pretrained embedding path if specified. num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) logging.info( 'Loaded pretrained embeddings from {}'.format(path)) return emb
def build_embedding(dictionary, embed_dim, path=None, feat=False): if feat: emb = Embeddings([Embedding(len(vocab), embed_dim, vocab.pad()) for _, vocab in dictionary.items()]) else: padding_idx = dictionary.pad() num_embeddings = len(dictionary) emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) # if not path and args.disable_training_embeddings: # raise ValueError('Do not set --disable_training_embeddings when pretrained embeddings are not provided.') # if args.disable_training_embeddings: # emb.weight.requires_grad = False return emb
def build_embedding(dictionary, embed_dim, is_encoder, path=None): if path is not None: if path.startswith('elmo:'): lm_path = path[5:] task = LanguageModelingTask(args, dictionary, dictionary) models, _ = utils.load_ensemble_for_inference( [lm_path], task, {'remove_head': True}) assert len( models ) == 1, 'ensembles are currently not supported for elmo embeddings' embedder = ElmoTokenEmbedder( models[0], dictionary.eos(), dictionary.pad(), add_bos=is_encoder, remove_bos=is_encoder, combine_tower_states=is_encoder, projection_dim=embed_dim, add_final_predictive=is_encoder, add_final_context=is_encoder) return embedder, 1 elif path.startswith('bilm:'): lm_path = path[5:] task = LanguageModelingTask(args, dictionary, dictionary) models, _ = utils.load_ensemble_for_inference( [lm_path], task, { 'remove_head': True, 'dropout': args.bilm_model_dropout, 'attention_dropout': args.bilm_attention_dropout, 'relu_dropout': args.bilm_relu_dropout, }) assert len( models ) == 1, 'ensembles are currently not supported for elmo embeddings' return BILMEmbedder(models[0], args, args.encoder_embed_dim) if is_encoder \ else LMEmbedder(models[0], args.decoder_embed_dim) num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = nn.Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def __init__(self, dictionary, encoder_embed_dim=512, embed_dim=512, embed_dict=None, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention=True): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.layers = nn.ModuleList([ LSTMCell( encoder_embed_dim + embed_dim if layer == 0 else embed_dim, embed_dim) for layer in range(num_layers) ]) self.attention = AttentionLayer(encoder_embed_dim, embed_dim) if attention else None if embed_dim != out_embed_dim: self.additional_fc = Linear(embed_dim, out_embed_dim) self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)
def __init__(self, dictionary, embed_dim=512, embed_dict=None, num_layers=1, dropout_in=0.1, dropout_out=0.1): super().__init__(dictionary) self.num_layers = num_layers self.dropout_in = dropout_in self.dropout_out = dropout_out num_embeddings = len(dictionary) self.padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.lstm = LSTM( input_size=embed_dim, hidden_size=embed_dim, num_layers=num_layers, dropout=self.dropout_out, bidirectional=False, )
def __init__(self, dictionary, embed_dim=512, embed_dict=None, max_positions=1024, convolutions=((512, 3),) * 20, dropout=0.1): super().__init__(dictionary) self.dropout = dropout self.num_attention_layers = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, embed_dim, padding_idx, left_pad=LanguagePairDataset.LEFT_PAD_SOURCE, ) in_channels = convolutions[0][0] self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() for (out_channels, kernel_size) in convolutions: self.projections.append(Linear(in_channels, out_channels) if in_channels != out_channels else None) if kernel_size % 2 == 1: padding = kernel_size // 2 else: padding = 0 self.convolutions.append( ConvTBC(in_channels, out_channels * 2, kernel_size, dropout=dropout, padding=padding) ) in_channels = out_channels self.fc2 = Linear(in_channels, embed_dim)
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens)
def __init__( self, dictionary, embed_dim=512, embed_dict=None, max_positions=1024, convolutions=((512, 3), ) * 20, dropout=0.1, ): super().__init__(dictionary) self.dropout = dropout self.num_attention_layers = None self.pad = dictionary.pad num_embeddings = len(dictionary) self.padding_idx = dictionary.pad self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, embed_dim, self.padding_idx, ) convolutions = extend_conv_spec(convolutions) in_channels = convolutions[0][0] self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() self.residuals = [] layer_in_channels = [in_channels] for _, (out_channels, kernel_size, residual) in enumerate(convolutions): if residual == 0: residual_dim = out_channels else: residual_dim = layer_in_channels[-residual] self.projections.append( Linear(residual_dim, out_channels ) if residual_dim != out_channels else None) if kernel_size % 2 == 1: padding = kernel_size // 2 else: padding = 0 self.convolutions.append( ConvTBC(in_channels, out_channels * 2, kernel_size, dropout=dropout, padding=padding)) self.residuals.append(residual) in_channels = out_channels layer_in_channels.append(out_channels) self.fc2 = Linear(in_channels, embed_dim)
def build_embedding(dictionary, embed_dim, path=None): """ Copied from fairseq.models.transformer :param dictionary: :param embed_dim: :param path: :return: """ num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def build_embedding(dictionary, embed_dim, path=None): # The dictionary may include additional items that can be used in # place of the normal OOV token and that all map to the same # embedding. Using a different token for each input position allows # one to restore the word identities from the original source text. num_embeddings = len(dictionary) - args.source_position_markers padding_idx = dictionary.pad() unk_idx = dictionary.unk() logger.info( "dictionary indices from {0} to {1} will be mapped to {2}". format(num_embeddings, len(dictionary) - 1, unk_idx)) emb = Embedding(num_embeddings, embed_dim, padding_idx, unk_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embedding = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embedding, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) # embed_keys = set(embed_dict.keys()) # vocab_keys = set(dictionary.symbols)) # print(vocab_keys - embed_keys) return utils.load_embedding(embed_dict, dictionary, embed_tokens), embed_dict
def build_embedding(cls, args, dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() if args.no_embed: one_hot_matrix = F.one_hot(torch.arange(num_embeddings)).float() one_hot_embed = torch.cat((one_hot_matrix, torch.zeros((num_embeddings, embed_dim - num_embeddings))), dim=1) one_hot_embed[padding_idx] = torch.zeros(embed_dim).unsqueeze(0) emb = nn.Embedding(num_embeddings, embed_dim, padding_idx=padding_idx) emb.weight = torch.nn.parameter.Parameter(one_hot_embed, requires_grad=False) else: emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def __init__( self, dictionary, args, embed_dim=512, embed_dict=None, max_positions=1024, convolutions=((512, 3),) * 20, dropout=0.1, normalization_constant=0.5, left_pad=True, ): super().__init__(dictionary) self.args = args self.dropout = dropout self.normalization_constant = normalization_constant self.left_pad = left_pad self.num_attention_layers = None num_embeddings = len(dictionary) self.padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, embed_dim, self.padding_idx, left_pad=self.left_pad #left_pad=False, #TODO: check LearnedPositionalEmbedding.forward() for the case of True ) convolutions = extend_conv_spec(convolutions) in_channels = convolutions[0][0] self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() self.residuals = [] layer_in_channels = [in_channels] for i, (out_channels, kernel_size, residual) in enumerate(convolutions): if residual == 0: residual_dim = out_channels else: residual_dim = layer_in_channels[-residual] self.projections.append(Linear(residual_dim, out_channels) if residual_dim != out_channels else None) if kernel_size % 2 == 1: padding = kernel_size // 2 else: padding = 0 self.convolutions.append( ConvTBC(in_channels, out_channels * 2, kernel_size, dropout=dropout, padding=padding) ) self.residuals.append(residual) in_channels = out_channels layer_in_channels.append(out_channels) self.fc2 = Linear(in_channels, embed_dim)
def load_embedding(embedding, dictionary, pretrained_embed): """Loads pretrained embeddings. Loads pretrained embeddings into a nn.Embedding layer. pretrained_embed can either be a nn.Embedding layer, in which case the embedding is set to the pretrained_embed argument, or a path to an embedding file. Arguments: embedding (nn.Embedding): Embedding layer whose weights are to be set. dictionary (fairseq.data.dictionary.Dictionary): dictionary with the same vocabulary size as the embedding argument. pretrained_embed (Union(string, nn.Embedding)): source of the weights to be loaded. """ if pretrained_embed is None: pass elif isinstance(pretrained_embed, torch.nn.Embedding): embedding.weight = pretrained_embed.weight else: embed_dict = utils.parse_embedding(pretrained_embed) utils.load_embedding(embed_dict, dictionary, embedding)
def __init__(self, dictionary, embed_dim=512, embed_dict=None, out_embed_dim=256, max_positions=1024, convolutions=((512, 3),) * 20, attention=True, dropout=0.1, share_embed=False): super().__init__(dictionary) self.register_buffer('version', torch.Tensor([2])) self.dropout = dropout in_channels = convolutions[0][0] if isinstance(attention, bool): # expand True into [True, True, ...] and do the same with False attention = [attention] * len(convolutions) if not isinstance(attention, list) or len(attention) != len(convolutions): raise ValueError('Attention is expected to be a list of booleans of ' 'length equal to the number of layers.') num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, embed_dim, padding_idx, left_pad=LanguagePairDataset.LEFT_PAD_TARGET, ) self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() self.attention = nn.ModuleList() for i, (out_channels, kernel_size) in enumerate(convolutions): self.projections.append(Linear(in_channels, out_channels) if in_channels != out_channels else None) self.convolutions.append( LinearizedConv1d(in_channels, out_channels * 2, kernel_size, padding=(kernel_size - 1), dropout=dropout) ) self.attention.append(AttentionLayer(out_channels, embed_dim) if attention[i] else None) in_channels = out_channels self.fc2 = Linear(in_channels, out_embed_dim) if share_embed: assert out_embed_dim == embed_dim, \ "Shared embed weights implies same dimensions " \ " out_embed_dim={} vs embed_dim={}".format(out_embed_dim, embed_dim) self.fc3 = nn.Linear(out_embed_dim, num_embeddings) self.fc3.weight = self.embed_tokens.weight else: self.fc3 = Linear(out_embed_dim, num_embeddings, dropout=dropout)
def copy_prev_embedding(embed_path, dictionary, embed_dim, prev_embedded_tokens_path, prev_dict): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = nn.Embedding(num_embeddings, embed_dim, padding_idx) prev_embedded_tokens = load_random_embedding(prev_embedded_tokens_path) for i in range(5, num_embeddings): if prev_dict.index(dictionary.symbols[i])!= prev_dict.unk() and i!=dictionary.unk(): embed_tokens.weight.data[i] = prev_embedded_tokens[prev_dict.index(dictionary.symbols[i])] #embed_tokens.weight = nn.Parameter(prev_embedded_tokens) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens)
def build_embedding(dictionary, embed_dim, path=None, num_embed_chunks=1): assert embed_dim % num_embed_chunks == 0, ( f"Number of embedding chunks = {num_embed_chunks} should be " + f"divisible by the embedding dimension = {embed_dim}" ) assert path is None or num_embed_chunks == 1, ( "Loading embedding from a path with number of embedding chunks > 1" + " is not yet supported" ) num_embeddings = len(dictionary) padding_idx = dictionary.pad() # if provided, load from preloaded dictionaries if path: emb = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) else: embed_chunk_dim = embed_dim // num_embed_chunks emb = nn.ModuleList() for i in range(num_embed_chunks): emb.append(Embedding(num_embeddings, embed_chunk_dim, padding_idx)) return emb
def __init__( self, dictionary, embed_dim=512, embed_dict=None, out_embed_dim=256, max_positions=1024, convolutions=((512, 3), ) * 20, attention=True, dropout=0.1, share_embed=False, positional_embeddings=True, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, ): super().__init__(dictionary) self.register_buffer('version', torch.Tensor([2])) self.dropout = dropout self.need_attn = True convolutions = extend_conv_spec(convolutions) in_channels = convolutions[0][0] if isinstance(attention, bool): # expand True into [True, True, ...] and do the same with False attention = [attention] * len(convolutions) if not isinstance(attention, list) or len(attention) != len(convolutions): raise ValueError( 'Attention is expected to be a list of booleans of ' 'length equal to the number of layers.') num_embeddings = len(dictionary) padding_idx = dictionary.pad self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, embed_dim, padding_idx, ) if positional_embeddings else None self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() self.attention = nn.ModuleList() self.residuals = [] layer_in_channels = [in_channels] for i, (out_channels, kernel_size, residual) in enumerate(convolutions): if residual == 0: residual_dim = out_channels else: residual_dim = layer_in_channels[-residual] self.projections.append( Linear(residual_dim, out_channels ) if residual_dim != out_channels else None) self.convolutions.append( LinearizedConv1d(in_channels, out_channels * 2, kernel_size, padding=(kernel_size - 1), dropout=dropout)) self.attention.append( AttentionLayer(out_channels, embed_dim ) if attention[i] else None) self.residuals.append(residual) in_channels = out_channels layer_in_channels.append(out_channels) self.adaptive_softmax = None self.fc2 = self.fc3 = None if adaptive_softmax_cutoff is not None: assert not share_embed self.adaptive_softmax = AdaptiveSoftmax( num_embeddings, in_channels, adaptive_softmax_cutoff, dropout=adaptive_softmax_dropout) else: self.fc2 = Linear(in_channels, out_embed_dim) if share_embed: assert out_embed_dim == embed_dim, \ "Shared embed weights implies same dimensions " \ " out_embed_dim={} vs embed_dim={}".format(out_embed_dim, embed_dim) self.fc3 = nn.Linear(out_embed_dim, num_embeddings) self.fc3.weight = self.embed_tokens.weight else: self.fc3 = Linear(out_embed_dim, num_embeddings, dropout=dropout)
def __init__( self, dictionary, embed_dim, embed_dict, max_positions, dropout, num_inputs, num_units, num_labels, num_layers=1, in_arcs=True, out_arcs=True, batch_first=False, residual='', use_gates=True, use_glus=False, # morph_embeddings=None, left_pad=True): super(GCNEncoder, self).__init__(dictionary) num_embeddings = len(dictionary) self.padding_idx = dictionary.pad() self.left_pad = left_pad self.dropout = dropout self.batch_first = batch_first self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, embed_dim, self.padding_idx, left_pad=self.left_pad # left_pad=False, #TODO: check LearnedPositionalEmbedding.forward() for the case of True ) self.num_layers = num_layers self.num_inputs = num_inputs self.num_units = num_units self.residual = residual self.use_gates = use_gates self.use_glus = use_glus # if morph_embeddings is not None: # self.morph_embeddings = morph_embeddings # self.emb_morph_emb = nn.Linear(num_inputs+morph_embeddings.embedding_size, num_inputs) # self.H_1 = nn.parameter.Parameter(torch.Tensor(self.num_units, self.num_units)) # nn.init.xavier_normal_(self.H_1) # self.H_2 = nn.parameter.Parameter(torch.Tensor(self.num_units, self.num_units)) # nn.init.xavier_normal_(self.H_2) # self.H_3 = nn.parameter.Parameter(torch.Tensor(self.num_units, self.num_units)) # nn.init.xavier_normal_(self.H_3) # self.H_4 = nn.parameter.Parameter(torch.Tensor(self.num_units, self.num_units)) # nn.init.xavier_normal_(self.H_4) self.gcn_layers = [] if residual == '' or residual == 'residual': for i in range(self.num_layers): gcn = GCNLayer(num_inputs, num_units, num_labels, in_arcs=in_arcs, out_arcs=out_arcs, batch_first=self.batch_first, use_gates=self.use_gates, use_glus=self.use_glus) self.gcn_layers.append(gcn) self.gcn_seq = nn.Sequential(*self.gcn_layers) elif residual == 'dense': for i in range(self.num_layers): input_size = num_inputs + (i * num_units) gcn = GCNLayer(input_size, num_units, num_labels, in_arcs=in_arcs, out_arcs=out_arcs, batch_first=self.batch_first, use_gates=self.use_gates, use_glus=self.use_glus) self.gcn_layers.append(gcn) self.gcn_seq = nn.Sequential(*self.gcn_layers)
def __init__(self, dictionary, args, encoder_embed_dim=512, embed_dict=None, max_positions=1024, convolutions=((512, 3), ) * 20, dropout=0.1, left_pad=True): super().__init__(dictionary) self.elmo = Elmo(options_file, weight_file, args.num_output_repr, dropout=args.elmo_dropout, do_layer_norm=args.elmo_do_layer_norm) self.args = args if self.args.merge_mode == 'sum': # just use in `sum` mode self.elmo_projection = Linear(args.elmo_repr_dim, encoder_embed_dim) self.id2token = {v: k for k, v in dictionary.indices.items()} self.dropout = dropout self.left_pad = left_pad self.num_attention_layers = None num_embeddings = len(dictionary) self.padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, args.token_embed_dim, self.padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, args.token_embed_dim, self.padding_idx, left_pad=self.left_pad, ) convolutions = extend_conv_spec(convolutions) in_channels = convolutions[0][0] self.fc1 = Linear(encoder_embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() self.residuals = [] layer_in_channels = [in_channels] for _, (out_channels, kernel_size, residual) in enumerate(convolutions): if residual == 0: residual_dim = out_channels else: residual_dim = layer_in_channels[-residual] self.projections.append( Linear(residual_dim, out_channels ) if residual_dim != out_channels else None) if kernel_size % 2 == 1: padding = kernel_size // 2 else: padding = 0 self.convolutions.append( ConvTBC(in_channels, out_channels * 2, kernel_size, dropout=dropout, padding=padding)) self.residuals.append(residual) in_channels = out_channels layer_in_channels.append(out_channels) if args.num_output_repr == 2 and args.merge_mode == 'concat': self.fc2 = Linear(in_channels + args.elmo_repr_dim, encoder_embed_dim) else: self.fc2 = Linear(in_channels, encoder_embed_dim)
def __init__(self, dictionary, embed_dim=512, embed_dict=None, max_positions=1024, convolutions=((512, 3), ) * 20, dropout=0.1, batch_norm=False, use_linear_se=False): super().__init__(dictionary) self.dropout = dropout self.num_attention_layers = None self.batch_norm = batch_norm num_embeddings = len(dictionary) self.padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, embed_dim, self.padding_idx, ) convolutions = extend_conv_spec_extended(convolutions) in_channels = convolutions[0][0] if use_linear_se: self.fc1 = LinearSE(embed_dim, in_channels, dropout=dropout) else: self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() self.inner_convolutions = nn.ModuleList() #self.se_layers = nn.ModuleList() self.residuals = [] self.kernel_sizes = 0 layer_in_channels = [in_channels] for idx, (out_channels, kernel_sizes, residual) in enumerate(convolutions): self.kernel_sizes = len(kernel_sizes) self.inner_convolutions.append(nn.ModuleList()) if residual == 0: residual_dim = out_channels else: residual_dim = layer_in_channels[-residual] if use_linear_se: self.projections.append( LinearSE(residual_dim, out_channels ) if residual_dim != out_channels else None) else: self.projections.append( Linear(residual_dim, out_channels ) if residual_dim != out_channels else None) for kernel_size in kernel_sizes: if kernel_size % 2 == 1: padding = kernel_size // 2 else: padding = 0 self.inner_convolutions[idx].append( ConvTBC(in_channels, out_channels * 2, kernel_size, dropout=dropout, padding=padding)) # TODO(naetherm): Combine the outputs of the convolution to one single instance max_pooling #self.convolutions.append(torch.stack(self.inner_convolutions[idx], dim=0).sum(dim=0)) #self.se_layers.append(SqueezeExcitationLayer(n_features=16)) self.residuals.append(residual) in_channels = out_channels layer_in_channels.append(out_channels) self.mp2d = torch.nn.MaxPool2d(kernel_size=(self.kernel_sizes, 1)) if use_linear_se: self.fc2 = LinearSE(in_channels, embed_dim) else: self.fc2 = Linear(in_channels, embed_dim)