def __init__(self, config): super(Transformer_EncoderDecoder, self).__init__() c = copy.deepcopy self.attn = MultiHeadedAttention(config['head'], config['emb_dim']) self.ff = PositionwiseFeedForward(config['emb_dim'], config['d_ff'], config['drop_out']) self.position = PositionalEncoding(config['emb_dim'], config['drop_out']) self.encoder = Encoder( EncoderLayer(config['emb_dim'], c(self.attn), c(self.ff), config['drop_out']), config['N_layers']) self.decoder = Decoder( DecoderLayer(config['emb_dim'], c(self.attn), c(self.attn), c(self.ff), config['drop_out']), config['N_layers']) self.src_embed = nn.Sequential( Embeddings(config['emb_dim'], config['vocab_size']), c(self.position)) self.tgt_embed = nn.Sequential( Embeddings(config['emb_dim'], config['vocab_size']), c(self.position)) self.generator = Generator(config['emb_dim'], config['vocab_size']) self.fc_out = nn.Linear(config['emb_dim'], config['vocab_size']) self.model = EncoderDecoder(self.encoder, self.decoder, self.src_embed, self.tgt_embed, self.generator)
def __init__(self, shared_embedding, d_model=512, d_ff=2048, num_heads=8, num_layers=6, max_len=150, dropout=0.1, pad_id=0, device=torch.device("cuda")): super(EncoderStack, self).__init__() self.layers = ModuleList([Encoder(d_model, d_ff, num_heads, dropout)] * num_layers) self.embedding = shared_embedding self.pe = PositionalEncoding(d_model, max_len, pad_id, device) self.dropout = Dropout(dropout)
def __init__(self, embedding_size, hidden_size, dropout_ratio, feature_size=2048 + 4): super(ConfigurationDecoder, self).__init__() self.embedding_size = embedding_size self.feature_size = feature_size self.hidden_size = hidden_size self.embedding = nn.Sequential( nn.Linear(args.angle_feat_size, self.embedding_size), nn.Tanh()) self.drop = nn.Dropout(p=dropout_ratio) self.drop_env = nn.Dropout(p=args.featdropout) self.lstm = nn.LSTMCell(embedding_size + feature_size + 3 * 300, hidden_size) self.feat_att_layer = SoftDotAttention(hidden_size, feature_size + 3 * 300) self.attention_layer = SoftDotAttention(hidden_size, hidden_size) self.candidate_att_layer = SoftDotAttention(hidden_size, feature_size + 3 * 300) self.similarity_att_layer = SoftDotAttention(hidden_size, hidden_size) self.object_att_layer = SoftDotAttention(hidden_size, hidden_size) self.state_attention = StateAttention() self.r_linear = nn.Linear(self.hidden_size, 2) self.sm = nn.Softmax(dim=-1) self.weight_linear = nn.Linear(2, 1) self.cos = torch.nn.CosineSimilarity(dim=-1) self.lang_position = PositionalEncoding(hidden_size, dropout=0.1, max_len=80)
def __init__(self, ntokens, embed_size, seq_len, attn_heads=4, pos_emb=False): """ :param ntokens: No. of tokens (vocabulary) :param embed_size: Embedding size :param seq_len: Length of the input sequence :param attn_heads: No. of attention heads :pos_emb: If true, use learned positional embeddings. If false, use positional encodings. """ super(DecoderLayer, self).__init__() self.ntokens = ntokens self.embeddings = nn.Embedding(ntokens, embed_size) if pos_emb: self.pos_emb = nn.Embedding(seq_len, embed_size) else: self.pos_emb = PositionalEncoding(embed_size) # Transformer with masked self-attention self.masked_transformer = TransformerBlock(attn_heads=attn_heads, embed_size=embed_size) self.transformer = TransformerBlock(attn_heads=attn_heads, embed_size=embed_size)
def __init__(self, ntokens, embed_size, seq_len, attn_heads=4, depth=1, pos_emb=False): """ :param ntokens: No. of tokens (vocabulary) :param embed_size: Embedding size :param seq_len: Length of the input sequence :param attn_heads: No. of attention heads :param depth: No. of transformer blocks :pos_emb: If true, use learned positional embeddings. If false, use positional encodings. """ super(Encoder, self).__init__() self.ntokens = ntokens self.embeddings = nn.Embedding(ntokens, embed_size) if pos_emb: self.pos_emb = nn.Embedding(seq_len, embed_size) else: self.pos_emb = PositionalEncoding(embed_size) transformer_blocks = [] for _ in range(depth): transformer_blocks.append( TransformerBlock(attn_heads=attn_heads, embed_size=embed_size)) self.tblocks = nn.Sequential(*transformer_blocks)
def __init__(self, d_model, d_ff, n_head, num_encoder_layers, label_vocab_size, dropout=0.1): super(LiteTransformerEncoder, self).__init__() self.padding = 1 self.kernel_size = 3 self.stride = 2 self.dilation = 1 self.src_embed = nn.Sequential( nn.Conv1d(in_channels=1, out_channels=d_model // 2, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation, bias=False), nn.BatchNorm1d(num_features=d_model // 2), nn.ReLU(inplace=True), nn.Conv1d(in_channels=d_model // 2, out_channels=d_model, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation, bias=False), nn.BatchNorm1d(num_features=d_model), nn.ReLU(inplace=True), ) # TODO: why padding_idx=0 self.position_encoding = PositionalEncoding(d_model=d_model, dropout=dropout) self.stack_layers = nn.ModuleList([ EncoderLayer(index=i, d_model=d_model, d_ff=d_ff, n_head=n_head, dropout=dropout) for i in range(num_encoder_layers) ]) #need change self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) self.final_proj = nn.Linear(d_model, label_vocab_size)
def make_transformer_model(src_vocab, tgt_vocab, config): "Make a transformer model base on config." c = copy.deepcopy attn = MultiHeadedAttention(config['h'], config['d_model']) ff = PositionwiseFeedForward(config['d_model'], config['d_ff'], config['dropout']) position = PositionalEncoding(config['d_model'], config['dropout']) # word_embed = nn.Sequential(Embeddings(config['d_model'], src_vocab), c(position)) embed, position = Embeddings(config['d_model'], src_vocab), c(position) model = EncoderDecoder( Encoder(EncoderLayer(config['d_model'], c(attn), c(ff), config['dropout']), config['num_layer']), Decoder(DecoderLayer(config['d_model'], c(attn), c(attn), c(ff), config['dropout']), config['num_layer'], config['d_model'], tgt_vocab, config['pointer_gen']), embed, position, embed, position, config['tie_weights'] ) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model