示例#1
0
    def __init__(self, head_count, model_dim, p=0.1):
        """
        Args:
            head_count(int): number of parallel heads.
            model_dim(int): the dimension of keys/values/queries in this
                MultiHeadedAttention, must be divisible by head_count.
        """
        assert model_dim % head_count == 0
        self.dim_per_head = model_dim // head_count
        self.model_dim = model_dim

        super(MultiHeadedAttention, self).__init__()
        self.head_count = head_count

        self.linear_keys = BottleLinear(model_dim,
                                        head_count * self.dim_per_head,
                                        bias=False)
        self.linear_values = BottleLinear(model_dim,
                                          head_count * self.dim_per_head,
                                          bias=False)
        self.linear_query = BottleLinear(model_dim,
                                         head_count * self.dim_per_head,
                                         bias=False)
        self.sm = BottleSoftmax()
        self.activation = nn.ReLU()
        self.layer_norm = BottleLayerNorm(model_dim)
        self.dropout = nn.Dropout(p)
        self.res_dropout = nn.Dropout(p)
示例#2
0
    def __init__(self, dim, coverage=False, attn_type="dotprod"):
        super(GlobalAttention, self).__init__()

        self.dim = dim
        self.attn_type = attn_type
        assert (self.attn_type
                in ["dotprod",
                    "mlp"]), ("Please select a valid attention type.")

        if self.attn_type == "dotprod":
            self.linear_in = nn.Linear(dim, dim, bias=False)
            self.linear_out = nn.Linear(dim * 2, dim, bias=False)
        elif self.attn_type == "mlp":
            self.linear_context = BottleLinear(dim, dim, bias=False)
            self.linear_query = nn.Linear(dim, dim, bias=True)
            self.mlp_tanh = nn.Tanh()
            self.v = BottleLinear(dim, 1, bias=False)
            self.linear_out = nn.Linear(dim * 2, dim, bias=True)

        self.sm = nn.Softmax()
        self.tanh = nn.Tanh()
        self.mask = None

        if coverage:
            self.linear_cover = nn.Linear(1, dim, bias=False)
示例#3
0
    def __init__(self, n_head, d_model, p=0.1):
        self.d_k = d_model // n_head

        super(MultiHeadedAttention, self).__init__()
        heads = self.heads = n_head
        self.linear_keys = BottleLinear(d_model, heads * self.d_k, bias=False)
        self.linear_values = BottleLinear(d_model,
                                          heads * self.d_k,
                                          bias=False)
        self.linear_query = BottleLinear(d_model, heads * self.d_k, bias=False)
        self.sm = BottleSoftmax()
        self.activation = nn.ReLU()
        self.layer_norm = BottleLayerNorm(d_model)
        self.dropout = nn.Dropout(p)
        self.res_dropout = nn.Dropout(p)
示例#4
0
    def __init__(self,
                 dim,
                 coverage=False,
                 attn_type="dotprod",
                 attn_transform="softmax",
                 c_attn=0.0):
        super(GlobalAttention, self).__init__()

        self.dim = dim
        self.attn_type = attn_type
        assert (self.attn_type
                in ["dotprod",
                    "mlp"]), ("Please select a valid attention type.")

        if self.attn_type == "dotprod":
            self.linear_in = nn.Linear(dim, dim, bias=False)
            self.linear_out = nn.Linear(dim * 2, dim, bias=False)
        elif self.attn_type == "mlp":
            self.linear_context = BottleLinear(dim, dim, bias=False)
            self.linear_query = nn.Linear(dim, dim, bias=False)
            self.v = BottleLinear(dim, 1, bias=False)
            # Modify initialization of self.v to have high variance
            # self.v.weight.data.normal_(0, 1000)
        if attn_transform == 'softmax':
            self.sm = nn.Softmax()
        elif attn_transform == 'sparsemax':
            self.sm = Sparsemax()
        elif attn_transform == 'constrained_softmax':
            self.sm = ConstrainedSoftmax()
        elif attn_transform == 'constrained_sparsemax':
            self.sm = ConstrainedSparsemax()
        else:
            raise NotImplementedError
        self.attn_transform = attn_transform

        self.tanh = nn.Tanh()
        self.mask = None
        self.c_attn = c_attn

        if coverage:
            self.linear_cover = nn.Linear(1, dim, bias=False)