def __init__(self,
                 num_layers,
                 d_model,
                 heads,
                 d_ff,
                 dropout,
                 embeddings,
                 tag_embeddings=None):
        super(TransformerDecoder, self).__init__()

        # Basic attributes.
        self.decoder_type = 'transformer'
        self.num_layers = num_layers
        self.embeddings = embeddings
        self.tag_embeddings = tag_embeddings
        self.pos_emb = PositionalEncoding(dropout,
                                          self.embeddings.embedding_dim)

        # Build TransformerDecoder.
        self.transformer_layers = nn.ModuleList([
            TransformerDecoderLayer(d_model, heads, d_ff, dropout)
            for _ in range(num_layers)
        ])

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
示例#2
0
    def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings):
        super(TransformerDecoder, self).__init__()

        # Basic attributes.
        self.decoder_type = 'transformer'
        self.num_layers = num_layers
        self.embeddings = embeddings
        self.pos_emb = PositionalEncoding(dropout, self.embeddings.embedding_dim)
        #
        self.context_attn_graph = MultiHeadedAttention(
            heads, d_model, dropout=dropout)
        self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
        self.drop_3 = nn.Dropout(dropout)
        self.layer_norm_3 = nn.LayerNorm(d_model, eps=1e-6)
        # Build TransformerDecoder.
        self.transformer_layers = nn.ModuleList(
            [TransformerDecoderLayer(d_model, heads, d_ff, dropout)
             for _ in range(num_layers)])

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
        self.att_weight_c = nn.Linear(self.embeddings.embedding_dim, 1)
        self.att_weight_q = nn.Linear(self.embeddings.embedding_dim, 1)
        self.att_weight_cq = nn.Linear(self.embeddings.embedding_dim, 1)
        self.graph_act = gelu
        self.graph_aware = nn.Linear(self.embeddings.embedding_dim*3, self.embeddings.embedding_dim)
        self.graph_drop = nn.Dropout(dropout)

        self.linear_filter = nn.Linear(d_model*2, 1)
        self.fix_top = torch.tensor((torch.arange(512,0,-1).type(torch.FloatTensor)/512).\
             unsqueeze(0).unsqueeze(0).expand(8, 512, -1)).to(self.get_device())
        self.fix_top.requires_grad = True
        self.fix_top = torch.nn.Parameter(self.fix_top, requires_grad=True)
        self.register_parameter("fix_top", self.fix_top)
示例#3
0
    def __init__(self,
                 num_layers,
                 d_model,
                 heads,
                 d_ff,
                 dropout,
                 embeddings,
                 cond_type=''):
        super(TransformerDecoder, self).__init__()

        # Basic attributes.
        self.decoder_type = 'transformer'
        self.num_layers = num_layers
        self.embeddings = embeddings
        self.pos_emb = PositionalEncoding(dropout,
                                          self.embeddings.embedding_dim)

        # Build TransformerDecoder.
        self.transformer_layers = nn.ModuleList([
            TransformerDecoderLayer(d_model,
                                    heads,
                                    d_ff,
                                    dropout,
                                    cond_type=cond_type)
            for _ in range(num_layers)
        ])

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)

        self.position_generator = nn.Sequential(nn.Linear(d_model * 2, 1),
                                                #nn.Softmax(dim=0)
                                                )
示例#4
0
    def __init__(self,
                 num_layers,
                 d_model,
                 heads,
                 d_ff,
                 dropout,
                 embeddings=None,
                 topic=False,
                 topic_dim=300,
                 split_noise=False):
        super(TransformerDecoder, self).__init__()

        # Basic attributes.
        self.decoder_type = 'transformer'
        self.num_layers = num_layers

        if embeddings is not None:
            self.embeddings = embeddings
            self.pos_emb = PositionalEncoding(dropout,
                                              self.embeddings.embedding_dim)

        # Build TransformerDecoder.
        self.transformer_layers = nn.ModuleList([
            TransformerDecoderLayer(d_model,
                                    heads,
                                    d_ff,
                                    dropout,
                                    topic=topic,
                                    topic_dim=topic_dim,
                                    split_noise=split_noise)
            for _ in range(num_layers)
        ])

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
    def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings,
                 use_universal_transformer):
        super(TransformerDecoder, self).__init__()

        # Basic attributes.
        self.decoder_type = 'transformer'
        self.num_layers = num_layers
        self.embeddings = embeddings
        self.pos_emb = PositionalEncoding(dropout,
                                          self.embeddings.embedding_dim)

        # Build TransformerDecoder.
        self.dim_mismatch = d_model != 768

        if self.dim_mismatch:
            self.linear_custom = nn.Linear(768, d_model)
            self.linear_custom_reverse = nn.Linear(d_model, 768)
            print(
                "TransformerDecoder# Dimension of input is 768, while d_model is {}. Therefore, Adding Upsampling and Downsampling Layer"
                .format(str(d_model)))
        self.common_ff = None
        if use_universal_transformer:
            print("Using Universal Transformer in Decoder")
            self.common_ff = PositionwiseFeedForward(d_model, d_ff, dropout)
        self.transformer_layers = nn.ModuleList([
            TransformerDecoderLayer(d_model, heads, d_ff, dropout,
                                    self.common_ff) for _ in range(num_layers)
        ])

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
示例#6
0
 def __init__(self, d_model, d_ff, heads, dropout, num_inter_layers=0, args=None):
     super(SentTransformerEncoder, self).__init__()
     self.d_model = d_model
     self.num_inter_layers = num_inter_layers
     sent_pos_emb = args.sent_pos_emb
     if sent_pos_emb:
         self.pos_emb = PositionalEncoding(dropout, d_model)
     else:
         self.pos_emb = None
     self.transformer_inter = nn.ModuleList(
         [TransformerEncoderLayer(d_model, heads, d_ff, dropout)
          for _ in range(num_inter_layers)])
     self.dropout = nn.Dropout(dropout)
     self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
 def __init__(self, args):
     super(SentTransformerEncoder, self).__init__()
     self.encoder_layers = args.encoder_layers
     if args.sent_pos_emb_enc:  # whether use pos emb for inter sentence encoding
         self.pos_emb = PositionalEncoding(args.dropout,
                                           args.encoder_embed_dim)
     else:
         self.pos_emb = None
     self.layers = nn.ModuleList([])
     self.layers.extend([
         TransformerEncoderLayer(args=args)
         for _ in range(args.encoder_layers)
     ])
     self.dropout = nn.Dropout(args.dropout)
     self.normalize = args.encoder_normalize_before
     if self.normalize:
         self.layer_norm = LayerNorm(args.encoder_embed_dim)
示例#8
0
 def __init__(self,
              d_model,
              d_ff,
              heads,
              dropout,
              num_inter_layers=0,
              use_pos_emb=False):
     super(TransformerDecoder, self).__init__()
     self.use_pos_emb = use_pos_emb
     self.d_model = d_model
     self.num_inter_layers = num_inter_layers
     if self.use_pos_emb:
         self.pos_emb = PositionalEncoding(dropout, d_model)
     self.transformer_inter = nn.ModuleList([
         TransformerDecoderLayer(d_model, heads, d_ff, dropout)
         for _ in range(num_inter_layers)
     ])
     self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
    def __init__(self, num_layers, d_model, heads, d_ff, dropout, embeddings, vocab_size):
        super(Z_TransformerDecoder, self).__init__()

        # Basic attributes.
        self.decoder_type = 'transformer'
        self.num_layers = num_layers
        self.embeddings = embeddings
        self.pos_emb = PositionalEncoding(dropout,self.embeddings.embedding_dim)
        self.vocab_size = vocab_size

        if COPY:
            self.copy_attn = MultiHeadedAttention(
                1, d_model, dropout=dropout)

        # Build TransformerDecoder.
        self.transformer_layers = nn.ModuleList(
            [Z_TransformerDecoderLayer(d_model, heads, d_ff, dropout)
             for _ in range(num_layers)])

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
示例#10
0
    def __init__(self,
                 num_layers,
                 d_model,
                 heads,
                 d_ff,
                 dropout,
                 embeddings,
                 sep_dec=False):
        super(TransformerDecoder, self).__init__()

        # print("dmodel = ", d_model)
        d_sep_model = d_model
        # (d_model * 2) // 3
        # Basic attributes.
        self.decoder_type = 'transformer'
        self.num_layers = num_layers
        self.embeddings = embeddings
        self.pos_emb = PositionalEncoding(dropout,
                                          self.embeddings.embedding_dim)

        # Build TransformerDecoder.
        # self.transformer_layers = nn.ModuleList(
        #     [TransformerDecoderLayer(d_model, heads, d_ff, dropout)
        #      for _ in range(num_layers)])
        # self.transformer_layers = nn.ModuleList(
        #     [TransformerDecoderLayer(d_model, heads, d_ff, dropout, sep_dec=False)
        #      for _ in range(num_layers//2)] + [TransformerDecoderLayer(d_sep_model, heads, d_ff, dropout, sep_dec=sep_dec)
        #      for _ in range(num_layers - num_layers//2)] )
        self.transformer_layers = nn.ModuleList([
            TransformerDecoderLayer(d_sep_model,
                                    heads,
                                    d_ff,
                                    dropout,
                                    sep_dec=sep_dec) for _ in range(num_layers)
        ])

        self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
示例#11
0
    def __init__(self, args, device, vocab, checkpoint=None):
        super(RankAE, self).__init__()
        self.args = args
        self.device = device
        self.vocab = vocab
        self.vocab_size = len(vocab)
        self.beam_size = args.beam_size
        self.max_length = args.max_length
        self.min_length = args.min_length

        self.start_token = vocab['[unused1]']
        self.end_token = vocab['[unused2]']
        self.pad_token = vocab['[PAD]']
        self.mask_token = vocab['[MASK]']
        self.seg_token = vocab['[unused3]']
        self.cls_token = vocab['[CLS]']

        self.hidden_size = args.enc_hidden_size
        self.embeddings = nn.Embedding(self.vocab_size, self.hidden_size, padding_idx=0)

        if args.encoder == 'bert':
            self.encoder = Bert(args.bert_dir, args.finetune_bert)
            if(args.max_pos > 512):
                my_pos_embeddings = nn.Embedding(args.max_pos, self.encoder.model.config.hidden_size)
                my_pos_embeddings.weight.data[:512] = self.encoder.model.embeddings.position_embeddings.weight.data
                my_pos_embeddings.weight.data[512:] = self.encoder.model.embeddings.position_embeddings.weight.data[-1][None, :].repeat(args.max_pos-512, 1)
                self.encoder.model.embeddings.position_embeddings = my_pos_embeddings
            tgt_embeddings = nn.Embedding(self.vocab_size, self.encoder.model.config.hidden_size, padding_idx=0)
        else:
            self.encoder = TransformerEncoder(self.hidden_size, args.enc_ff_size, args.enc_heads,
                                              args.enc_dropout, args.enc_layers)
            tgt_embeddings = nn.Embedding(self.vocab_size, self.hidden_size, padding_idx=0)

        self.hier_encoder = TransformerEncoder(self.hidden_size, args.hier_ff_size, args.hier_heads,
                                               args.hier_dropout, args.hier_layers)
        self.cup_bilinear = nn.Bilinear(self.hidden_size, self.hidden_size, 1)
        self.pos_emb = PositionalEncoding(0., self.hidden_size)

        self.decoder = TransformerDecoder(
            self.args.dec_layers,
            self.args.dec_hidden_size, heads=self.args.dec_heads,
            d_ff=self.args.dec_ff_size, dropout=self.args.dec_dropout,
            embeddings=tgt_embeddings)

        self.generator = Generator(self.vocab_size, self.args.dec_hidden_size, self.pad_token)

        self.generator.linear.weight = self.decoder.embeddings.weight

        if checkpoint is not None:
            self.load_state_dict(checkpoint['model'], strict=True)
        else:
            if args.encoder == "transformer":
                for module in self.encoder.modules():
                    self._set_parameter_tf(module)
                xavier_uniform_(self.embeddings.weight)
            for module in self.decoder.modules():
                self._set_parameter_tf(module)
            for module in self.hier_encoder.modules():
                self._set_parameter_tf(module)
            for p in self.generator.parameters():
                self._set_parameter_linear(p)
            for p in self.cup_bilinear.parameters():
                self._set_parameter_linear(p)
            if args.share_emb:
                if args.encoder == 'bert':
                    self.embeddings = self.encoder.model.embeddings.word_embeddings
                    tgt_embeddings = nn.Embedding(self.vocab_size, self.encoder.model.config.hidden_size, padding_idx=0)
                    tgt_embeddings.weight = copy.deepcopy(self.encoder.model.embeddings.word_embeddings.weight)
                else:
                    tgt_embeddings = self.embeddings
                self.decoder.embeddings = tgt_embeddings
                self.generator.linear.weight = self.decoder.embeddings.weight

        self.to(device)