def __init__(self, vocab_size: int, att_dim: int = 512, nhead: int = 8, feedforward_dim: int = 2048, scale_embed: bool = False, pos_dropout: float = 0, att_dropout: float = 0.1, ffn_dropout: float = 0.1, num_layers: int = 6, post_norm: bool = True) -> None: super(TorchTransformerDecoder, self).__init__() # default normal init (std=1), do not need to scale self.vocab_embed = nn.Embedding(vocab_size, att_dim) # use absolute positional embedding here self.abs_pos_enc = get_xfmr_pose("xfmr_abs", att_dim, dropout=pos_dropout, scale_embed=scale_embed) decoder_layer = TransformerDncoderLayer( att_dim, nhead, dim_feedforward=feedforward_dim, att_dropout=att_dropout, ffn_dropout=ffn_dropout, pre_norm=not post_norm) final_norm = nn.LayerNorm(att_dim) if not post_norm else None self.decoder = TransformerDecoder(decoder_layer, num_layers, norm=final_norm) self.output = nn.Linear(att_dim, vocab_size, bias=False) self.vocab_size = vocab_size
def __init__(self, vocab_size: int = 40, att_dim: int = 512, nhead: int = 8, feedforward_dim: int = 2048, scale_embed: bool = False, pos_dropout: float = 0.1, att_dropout: float = 0.1, ffn_dropout: float = 0.1, num_layers: int = 6) -> None: super(TorchXfmrLM, self).__init__() self.vocab_embed = nn.Embedding(vocab_size, att_dim) self.abs_pos_enc = get_xfmr_pose("xfmr_abs", att_dim, dropout=pos_dropout, scale_embed=scale_embed) self.encoder = get_xfmr_encoder("xfmr_abs", num_layers, att_dim, nhead, dim_feedforward=feedforward_dim, att_dropout=att_dropout, ffn_dropout=ffn_dropout) # output distribution self.dist = nn.Linear(att_dim, vocab_size) self.vocab_size = vocab_size
def __init__(self, vocab_size: int, enc_dim: Optional[int] = None, jot_dim: int = 512, att_dim: int = 512, nhead: int = 8, feedforward_dim: int = 2048, scale_embed: bool = False, pos_dropout: float = 0.1, att_dropout: float = 0.1, ffn_dropout: float = 0.1, num_layers: int = 6, post_norm: bool = True, onehot_embed: bool = False) -> None: super(TorchTransformerDecoder, self).__init__(vocab_size, enc_dim=enc_dim if enc_dim else att_dim, dec_dim=att_dim, jot_dim=jot_dim, onehot_embed=onehot_embed) self.abs_pos_enc = get_xfmr_pose("xfmr_abs", att_dim, dropout=pos_dropout, scale_embed=scale_embed) self.decoder = get_xfmr_encoder("xfmr_abs", num_layers, att_dim, nhead, dim_feedforward=feedforward_dim, att_dropout=att_dropout, ffn_dropout=ffn_dropout, pre_norm=not post_norm)
def __init__(self, enc_type: str, input_size: int, proj_layer: str = "conv2d", proj_kwargs: Optional[Dict] = None, att_dim: int = 512, nhead: int = 8, feedforward_dim: int = 2048, num_layers: int = 6, radius: int = 128, scale_embed: bool = False, pos_dropout: float = 0.1, att_dropout: float = 0.1, ffn_dropout: float = 0.1, kernel_size: int = 16, post_norm: bool = True, untie_rel: bool = True): super(TransformerEncoder, self).__init__() self.type = enc_type.split("_")[-1] self.proj = get_xfmr_proj(proj_layer, input_size, att_dim, proj_kwargs) self.pose = get_xfmr_pose(enc_type, att_dim, nhead=nhead, radius=radius, dropout=pos_dropout, scale_embed=scale_embed) self.encoder = get_xfmr_encoder(enc_type, num_layers, att_dim, nhead, dim_feedforward=feedforward_dim, att_dropout=att_dropout, ffn_dropout=ffn_dropout, kernel_size=kernel_size, pre_norm=not post_norm, untie_rel=untie_rel)