Exemplo n.º 1
0
    def __init__(self,
                 width: int,
                 input_size: int,
                 hidden_size: int,
                 n_layers: int,
                 use_position: bool = False,
                 dropout: float = 0.0):
        super(Bengio03ResNetBiLm, self).__init__()
        self.use_position = use_position
        self.n_layers = n_layers

        self.dropout = torch.nn.Dropout(p=dropout)
        self.activation = torch.nn.ReLU()

        self.width = width
        self.input_size = input_size
        self.context_input_size = input_size * (width + 1)
        self.hidden_size = hidden_size

        forward_paddings, backward_paddings = [], []
        forward_projects, backward_projects = [], []
        for i in range(n_layers):
            forward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))
            backward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))

            forward_projects.append(
                torch.nn.Linear(self.context_input_size, hidden_size))
            backward_projects.append(
                torch.nn.Linear(self.context_input_size, hidden_size))

        self.forward_projects = torch.nn.ModuleList(forward_projects)
        self.backward_projects = torch.nn.ModuleList(backward_projects)

        self.forward_paddings = torch.nn.ParameterList(forward_paddings)
        self.backward_paddings = torch.nn.ParameterList(backward_paddings)

        self.left_linears = torch.nn.ModuleList([
            PositionwiseFeedForward(hidden_size, hidden_size,
                                    self.config['dropout'])
            for _ in range(n_layers)
        ])
        self.right_linears = torch.nn.ModuleList([
            PositionwiseFeedForward(hidden_size, hidden_size,
                                    self.config['dropout'])
            for _ in range(n_layers)
        ])

        self.left_blocks = torch.nn.ModuleList([
            SublayerConnection(hidden_size, self.config['dropout'])
            for _ in range(n_layers)
        ])
        self.right_blocks = torch.nn.ModuleList([
            SublayerConnection(hidden_size, self.config['dropout'])
            for _ in range(n_layers)
        ])
Exemplo n.º 2
0
    def __init__(self,
                 width: int,
                 input_size: int,
                 hidden_size: int,
                 n_layers: int,
                 use_position: bool = False,
                 dropout: float = 0.0):
        super(LBLResNetBiLm, self).__init__()
        self.use_position = use_position

        self.dropout = torch.nn.Dropout(dropout)
        self.activation = torch.nn.ReLU()

        self.width = width
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        forward_paddings, backward_paddings = [], []
        forward_weights, backward_weights = [], []
        for _ in range(self.n_layers):
            forward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))
            backward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))
            forward_weights.append(torch.nn.Parameter(torch.randn(width + 1)))
            backward_weights.append(torch.nn.Parameter(torch.randn(width + 1)))

        self.forward_paddings = torch.nn.ParameterList(forward_paddings)
        self.backward_paddings = torch.nn.ParameterList(backward_paddings)
        self.forward_weights = torch.nn.Parameter(forward_weights)
        self.backward_weights = torch.nn.Parameter(backward_weights)

        if self.use_position:
            self.position = PositionalEncoding(hidden_size)

        self.forward_linears = torch.nn.ModuleList([
            PositionwiseFeedForward(hidden_size, hidden_size, dropout)
            for _ in range(n_layers)
        ])
        self.backward_linears = torch.nn.ModuleList([
            PositionwiseFeedForward(hidden_size, hidden_size, dropout)
            for _ in range(n_layers)
        ])

        self.forward_blocks = torch.nn.ModuleList([
            SublayerConnection(hidden_size, dropout) for _ in range(n_layers)
        ])
        self.backward_blocks = torch.nn.ModuleList([
            SublayerConnection(hidden_size, dropout) for _ in range(n_layers)
        ])
Exemplo n.º 3
0
    def __init__(
        self,
        decoding_dim: int,
        target_embedding_dim: int,
        feedforward_hidden_dim: int,
        num_layers: int,
        num_attention_heads: int,
        use_positional_encoding: bool = True,
        positional_encoding_max_steps: int = 5000,
        dropout_prob: float = 0.1,
        residual_dropout_prob: float = 0.2,
        attention_dropout_prob: float = 0.1,
    ) -> None:

        super().__init__(decoding_dim=decoding_dim,
                         target_embedding_dim=target_embedding_dim,
                         decodes_parallel=True)

        attn = MultiHeadedAttention(num_attention_heads, decoding_dim,
                                    attention_dropout_prob)
        feed_forward = PositionwiseFeedForward(decoding_dim,
                                               feedforward_hidden_dim,
                                               dropout_prob)
        self._embed_scale = math.sqrt(decoding_dim)
        self._positional_embedder = PositionalEncoding(decoding_dim,
                                                       positional_encoding_max_steps) \
                                                       if use_positional_encoding else None
        self._dropout = nn.Dropout(dropout_prob)
        self._self_attention = Decoder(
            DecoderLayer(decoding_dim, deepcopy(attn), deepcopy(attn),
                         feed_forward, residual_dropout_prob), num_layers)
Exemplo n.º 4
0
    def __init__(
        self,
        num_layers: int,
        decoding_dim: int,
        target_embedding_dim: int,
        feedforward_hidden_dim: int,
        num_attention_heads: int,
        combiner: TransformerCombiner,
        num_sources: int,
        use_positional_encoding: bool = True,
        positional_encoding_max_steps: int = 5000,
        dropout_prob: float = 0.1,
        residual_dropout_prob: float = 0.2,
        attention_dropout_prob: float = 0.2,
    ) -> None:
        super().__init__(decoding_dim,
                         target_embedding_dim,
                         decodes_parallel=True)

        self._decoding_dim = decoding_dim
        self._embed_scale = math.sqrt(decoding_dim)

        self._positional_embedder = (PositionalEncoding(
            input_dim=decoding_dim, max_len=positional_encoding_max_steps)
                                     if use_positional_encoding else None)
        self._dropout = nn.Dropout(dropout_prob)

        generic_attn = MultiHeadedAttention(num_attention_heads, decoding_dim,
                                            attention_dropout_prob)
        combined_attn = AttentionCombiner(num_sources, generic_attn, combiner)
        feed_forward = PositionwiseFeedForward(decoding_dim,
                                               feedforward_hidden_dim,
                                               dropout_prob)

        layer = DecoderLayer(size=decoding_dim,
                             self_attn=deepcopy(generic_attn),
                             src_attn=deepcopy(combined_attn),
                             feed_forward=feed_forward,
                             dropout=residual_dropout_prob)

        self._self_attention_layers = _clones(layer, num_layers)
        self.norm = nn.LayerNorm(layer.size)