def build_embedding(dictionary, embed_dim, path=None): num_embeddings = len(dictionary) padding_idx = dictionary.pad() emb = Embedding(num_embeddings, embed_dim, padding_idx) # if provided, load from preloaded dictionaries if path: embed_dict = utils.parse_embedding(path) utils.load_embedding(embed_dict, dictionary, emb) return emb
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim): num_embeddings = len(dictionary) padding_idx = dictionary.pad() embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) embed_dict = utils.parse_embedding(embed_path) utils.print_embed_overlap(embed_dict, dictionary) return utils.load_embedding(embed_dict, dictionary, embed_tokens)
def __init__( self, dictionary, embed_dim=512, embed_dict=None, max_positions=1024, convolutions=((512, 3), ) * 20, dropout=0.1, ): super().__init__(dictionary) self.dropout = dropout self.num_attention_layers = None num_embeddings = len(dictionary) self.padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, self.padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, embed_dim, self.padding_idx, ) convolutions = extend_conv_spec(convolutions) in_channels = convolutions[0][0] self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() self.residuals = [] layer_in_channels = [in_channels] for _, (out_channels, kernel_size, residual) in enumerate(convolutions): if residual == 0: residual_dim = out_channels else: residual_dim = layer_in_channels[-residual] self.projections.append( Linear(residual_dim, out_channels ) if residual_dim != out_channels else None) if kernel_size % 2 == 1: padding = kernel_size // 2 else: padding = 0 self.convolutions.append( ConvTBC(in_channels, out_channels * 2, kernel_size, dropout=dropout, padding=padding)) self.residuals.append(residual) in_channels = out_channels layer_in_channels.append(out_channels) self.fc2 = Linear(in_channels, embed_dim)
def __init__( self, dictionary, embed_dim=512, embed_dict=None, out_embed_dim=256, max_positions=1024, convolutions=((512, 3), ) * 20, attention=True, dropout=0.1, share_embed=False, positional_embeddings=True, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, ): super().__init__(dictionary) self.register_buffer('version', torch.Tensor([2])) self.dropout = dropout self.need_attn = True convolutions = extend_conv_spec(convolutions) in_channels = convolutions[0][0] if isinstance(attention, bool): # expand True into [True, True, ...] and do the same with False attention = [attention] * len(convolutions) if not isinstance(attention, list) or len(attention) != len(convolutions): raise ValueError( 'Attention is expected to be a list of booleans of ' 'length equal to the number of layers.') num_embeddings = len(dictionary) padding_idx = dictionary.pad() self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) if embed_dict: self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens) self.embed_positions = PositionalEmbedding( max_positions, embed_dim, padding_idx, ) if positional_embeddings else None self.fc1 = Linear(embed_dim, in_channels, dropout=dropout) self.projections = nn.ModuleList() self.convolutions = nn.ModuleList() self.attention = nn.ModuleList() self.residuals = [] layer_in_channels = [in_channels] for i, (out_channels, kernel_size, residual) in enumerate(convolutions): if residual == 0: residual_dim = out_channels else: residual_dim = layer_in_channels[-residual] self.projections.append( Linear(residual_dim, out_channels ) if residual_dim != out_channels else None) self.convolutions.append( LinearizedConv1d(in_channels, out_channels * 2, kernel_size, padding=(kernel_size - 1), dropout=dropout)) self.attention.append( AttentionLayer(out_channels, embed_dim ) if attention[i] else None) self.residuals.append(residual) in_channels = out_channels layer_in_channels.append(out_channels) self.adaptive_softmax = None self.fc2 = self.fc3 = None if adaptive_softmax_cutoff is not None: assert not share_embed self.adaptive_softmax = AdaptiveSoftmax( num_embeddings, in_channels, adaptive_softmax_cutoff, dropout=adaptive_softmax_dropout) else: self.fc2 = Linear(in_channels, out_embed_dim) if share_embed: assert out_embed_dim == embed_dim, \ "Shared embed weights implies same dimensions " \ " out_embed_dim={} vs embed_dim={}".format(out_embed_dim, embed_dim) self.fc3 = nn.Linear(out_embed_dim, num_embeddings) self.fc3.weight = self.embed_tokens.weight else: self.fc3 = Linear(out_embed_dim, num_embeddings, dropout=dropout)