示例#1
0
 def __init__(self,
              vocab_size: int,
              att_dim: int = 512,
              nhead: int = 8,
              feedforward_dim: int = 2048,
              scale_embed: bool = False,
              pos_dropout: float = 0,
              att_dropout: float = 0.1,
              ffn_dropout: float = 0.1,
              num_layers: int = 6,
              post_norm: bool = True) -> None:
     super(TorchTransformerDecoder, self).__init__()
     # default normal init (std=1), do not need to scale
     self.vocab_embed = nn.Embedding(vocab_size, att_dim)
     # use absolute positional embedding here
     self.abs_pos_enc = get_xfmr_pose("xfmr_abs",
                                      att_dim,
                                      dropout=pos_dropout,
                                      scale_embed=scale_embed)
     decoder_layer = TransformerDncoderLayer(
         att_dim,
         nhead,
         dim_feedforward=feedforward_dim,
         att_dropout=att_dropout,
         ffn_dropout=ffn_dropout,
         pre_norm=not post_norm)
     final_norm = nn.LayerNorm(att_dim) if not post_norm else None
     self.decoder = TransformerDecoder(decoder_layer,
                                       num_layers,
                                       norm=final_norm)
     self.output = nn.Linear(att_dim, vocab_size, bias=False)
     self.vocab_size = vocab_size
示例#2
0
 def __init__(self,
              vocab_size: int = 40,
              att_dim: int = 512,
              nhead: int = 8,
              feedforward_dim: int = 2048,
              scale_embed: bool = False,
              pos_dropout: float = 0.1,
              att_dropout: float = 0.1,
              ffn_dropout: float = 0.1,
              num_layers: int = 6) -> None:
     super(TorchXfmrLM, self).__init__()
     self.vocab_embed = nn.Embedding(vocab_size, att_dim)
     self.abs_pos_enc = get_xfmr_pose("xfmr_abs",
                                      att_dim,
                                      dropout=pos_dropout,
                                      scale_embed=scale_embed)
     self.encoder = get_xfmr_encoder("xfmr_abs",
                                     num_layers,
                                     att_dim,
                                     nhead,
                                     dim_feedforward=feedforward_dim,
                                     att_dropout=att_dropout,
                                     ffn_dropout=ffn_dropout)
     # output distribution
     self.dist = nn.Linear(att_dim, vocab_size)
     self.vocab_size = vocab_size
示例#3
0
 def __init__(self,
              vocab_size: int,
              enc_dim: Optional[int] = None,
              jot_dim: int = 512,
              att_dim: int = 512,
              nhead: int = 8,
              feedforward_dim: int = 2048,
              scale_embed: bool = False,
              pos_dropout: float = 0.1,
              att_dropout: float = 0.1,
              ffn_dropout: float = 0.1,
              num_layers: int = 6,
              post_norm: bool = True,
              onehot_embed: bool = False) -> None:
     super(TorchTransformerDecoder,
           self).__init__(vocab_size,
                          enc_dim=enc_dim if enc_dim else att_dim,
                          dec_dim=att_dim,
                          jot_dim=jot_dim,
                          onehot_embed=onehot_embed)
     self.abs_pos_enc = get_xfmr_pose("xfmr_abs",
                                      att_dim,
                                      dropout=pos_dropout,
                                      scale_embed=scale_embed)
     self.decoder = get_xfmr_encoder("xfmr_abs",
                                     num_layers,
                                     att_dim,
                                     nhead,
                                     dim_feedforward=feedforward_dim,
                                     att_dropout=att_dropout,
                                     ffn_dropout=ffn_dropout,
                                     pre_norm=not post_norm)
示例#4
0
文件: encoder.py 项目: yt752/aps
 def __init__(self,
              enc_type: str,
              input_size: int,
              proj_layer: str = "conv2d",
              proj_kwargs: Optional[Dict] = None,
              att_dim: int = 512,
              nhead: int = 8,
              feedforward_dim: int = 2048,
              num_layers: int = 6,
              radius: int = 128,
              scale_embed: bool = False,
              pos_dropout: float = 0.1,
              att_dropout: float = 0.1,
              ffn_dropout: float = 0.1,
              kernel_size: int = 16,
              post_norm: bool = True,
              untie_rel: bool = True):
     super(TransformerEncoder, self).__init__()
     self.type = enc_type.split("_")[-1]
     self.proj = get_xfmr_proj(proj_layer, input_size, att_dim, proj_kwargs)
     self.pose = get_xfmr_pose(enc_type,
                               att_dim,
                               nhead=nhead,
                               radius=radius,
                               dropout=pos_dropout,
                               scale_embed=scale_embed)
     self.encoder = get_xfmr_encoder(enc_type,
                                     num_layers,
                                     att_dim,
                                     nhead,
                                     dim_feedforward=feedforward_dim,
                                     att_dropout=att_dropout,
                                     ffn_dropout=ffn_dropout,
                                     kernel_size=kernel_size,
                                     pre_norm=not post_norm,
                                     untie_rel=untie_rel)