Exemplo n.º 1
0
    def forward(self,  # pylint: disable=arguments-differ
                sequence_tensor: PackedSequence,
                initial_state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None) :

        if not isinstance(inputs, PackedSequence):
            raise ConfigurationError('inputs must be PackedSequence but got %s' % (type(inputs)))

        sequence_tensor, batch_lengths = pad_packed_sequence(inputs, batch_first=True)
        batch_size = sequence_tensor.size()[0]
        length = sequence_tensor.size()[1]

        hidden = torch.tensor([])
        cell = torch.tensor([])
        #初始化所有层隐藏状态
        if initial_state is None:
            hidden = sequence_tensor.new_zeros(self.num_layers, batch_size, self.hidden_size)
            cell = sequence_tensor.new_zeros(self.num_layers, batch_size, self.n_chunk, self.chunk_size)
        else:
            hidden = initial_state[0].squeeze(0)
            cell = initial_state[1].squeeze(0)

        if self.training:
            for c in self.cells:
                c.sample_masks()

        final_hidden = []
        final_cell = []

        for l in range(len(self.cells)):
            curr_layer = [None] * length
            t_input = self.cells[l].ih(sequence_tensor)
            hx = hidden[l].squeeze(0)
            cx = cell[l].squeeze(0)

            for t in range(length):
                hidden, cell = self.cells[l](None, hx, cx, transformed_input=t_input[:, t])
                # length, dim
                hx, cx = hidden, cell  # overwritten every timestep
                curr_layer[t] = hidden

            final_hidden.append(hx)
            final_cell.append(cx)
            # batch, length, dim
            sequence_tensor = torch.stack(curr_layer, dim=1)
            if l < len(self.cells) - 1:
                sequence_tensor = self.lockdrop(sequence_tensor, self.dropout)  #每一层LSTM后加lockdrop


        output = pack_padded_sequence(sequence_tensor, batch_lengths, batch_first=True)
        final_state = (torch.stack(final_hidden), torch.stack(final_cell))

        return sequence_tensor, final_state
Exemplo n.º 2
0
    def forward(self, x, h=None):
        out, h = self.lstm_forward(x, h)

        if isinstance(out, PackedSequence):
            data = out.data
            out = PackedSequence(self.linear(data), out.batch_sizes)
        else:
            # Reshape output to (batch_size*sequence_length, hidden_size)
            # this block works as intended whether batch_first is True or False, though the var names are deceptive
            batch, seq_len = out.size(0), out.size(1)
            data = out.contiguous().view(batch * seq_len, self.hidden_size)
            # Decode hidden states of all time steps
            out = self.linear(data).view(batch, seq_len, self.vocab_size)

        return out
 def forward(self, x):
     
     if self.lm:
         h = self.embed(x)
     else:
         if type(x) is PackedSequence:
             h = self.embed(x.data)
             h = PackedSequence(h, x.batch_sizes)
         else:
             h = self.embed(x)
         
     h,_ = self.rnn(h)
     
     if type(h) is PackedSequence:
         h = h.data
         h = self.dropout(h)
         z = self.proj(h)
         z = PackedSequence(z, x.batch_sizes)
     else:
         h = h.view(-1, h.size(2))
         h = self.dropout(h)
         z = self.proj(h)
         z = z.view(x.size(0), x.size(1), -1)
 
     return z
    def forward(
        self,
        inputs: PackedSequence,  # pylint: disable=arguments-differ
        # pylint: disable=unused-argument
        initial_state: torch.Tensor = None
    ) -> Tuple[PackedSequence, torch.Tensor]:
        """
        Parameters
        ----------
        inputs : ``PackedSequence``, required.
            A batch first ``PackedSequence`` to run the stacked LSTM over.
        initial_state : Tuple[torch.Tensor, torch.Tensor], optional, (default = None)
            Currently, this is ignored.

        Returns
        -------
        output_sequence : ``PackedSequence``
            The encoded sequence of shape (batch_size, sequence_length, hidden_size)
        final_states: ``torch.Tensor``
            The per-layer final (state, memory) states of the LSTM, each with shape
            (num_layers, batch_size, hidden_size).
        """
        inputs, lengths = pad_packed_sequence(inputs, batch_first=True)

        # Kernel takes sequence length first tensors.
        inputs = inputs.transpose(0, 1)

        sequence_length, batch_size, _ = inputs.size()
        accumulator_shape = [
            self.num_layers, sequence_length + 1, batch_size, self.hidden_size
        ]
        state_accumulator = inputs.new_zeros(*accumulator_shape)
        memory_accumulator = inputs.new_zeros(*accumulator_shape)

        dropout_weights = inputs.new_ones(self.num_layers, batch_size,
                                          self.hidden_size)
        if self.training:
            # Normalize by 1 - dropout_prob to preserve the output statistics of the layer.
            dropout_weights.bernoulli_(1 - self.recurrent_dropout_probability)\
                .div_((1 - self.recurrent_dropout_probability))

        gates = inputs.new_tensor((self.num_layers, sequence_length,
                                   batch_size, 6 * self.hidden_size))

        lengths_variable = torch.LongTensor(lengths)
        implementation = _AlternatingHighwayLSTMFunction(
            self.input_size,
            self.hidden_size,
            num_layers=self.num_layers,
            train=self.training)
        output, _ = implementation(inputs, self.weight, self.bias,
                                   state_accumulator, memory_accumulator,
                                   dropout_weights, lengths_variable, gates)

        # TODO(Mark): Also return the state here by using index_select with the lengths so we can use
        # it as a Seq2VecEncoder.
        output = output.transpose(0, 1)
        output = pack_padded_sequence(output, lengths, batch_first=True)
        return output, None
Exemplo n.º 5
0
 def forward(self, packed_x: PackedSequence):
     """
     forward expects a PackedSequence as input.
     """
     num_chunks = max(1, packed_x.size(0) // self.config.chunk_size)
     chunks = torch.chunk(packed_x, num_chunks, 0)
     out = [self.process(chunk) for chunk in chunks]
     encoded = torch.cat(out, 0)
     return encoded
    def forward(self, inputs: PackedSequence,  # pylint: disable=arguments-differ
                # pylint: disable=unused-argument
                initial_state: torch.Tensor = None)-> Tuple[PackedSequence, torch.Tensor]:
        """
        Parameters
        ----------
        inputs : ``PackedSequence``, required.
            A batch first ``PackedSequence`` to run the stacked LSTM over.
        initial_state : Tuple[torch.Tensor, torch.Tensor], optional, (default = None)
            Currently, this is ignored.

        Returns
        -------
        output_sequence : ``PackedSequence``
            The encoded sequence of shape (batch_size, sequence_length, hidden_size)
        final_states: ``torch.Tensor``
            The per-layer final (state, memory) states of the LSTM, each with shape
            (num_layers, batch_size, hidden_size).
        """
        inputs, lengths = pad_packed_sequence(inputs, batch_first=True)

        # Kernel takes sequence length first tensors.
        inputs = inputs.transpose(0, 1)

        sequence_length, batch_size, _ = inputs.size()
        accumulator_shape = [self.num_layers, sequence_length + 1, batch_size, self.hidden_size]
        state_accumulator = Variable(inputs.data.new(*accumulator_shape).zero_(), requires_grad=False)
        memory_accumulator = Variable(inputs.data.new(*accumulator_shape).zero_(), requires_grad=False)

        dropout_weights = inputs.data.new().resize_(self.num_layers, batch_size, self.hidden_size).fill_(1.0)
        if self.training:
            # Normalize by 1 - dropout_prob to preserve the output statistics of the layer.
            dropout_weights.bernoulli_(1 - self.recurrent_dropout_probability)\
                .div_((1 - self.recurrent_dropout_probability))

        dropout_weights = Variable(dropout_weights, requires_grad=False)
        gates = Variable(inputs.data.new().resize_(self.num_layers,
                                                   sequence_length,
                                                   batch_size, 6 * self.hidden_size))

        lengths_variable = Variable(torch.IntTensor(lengths))
        implementation = _AlternatingHighwayLSTMFunction(self.input_size,
                                                         self.hidden_size,
                                                         num_layers=self.num_layers,
                                                         train=self.training)
        output, _ = implementation(inputs, self.weight, self.bias, state_accumulator,
                                   memory_accumulator, dropout_weights, lengths_variable, gates)

        # TODO(Mark): Also return the state here by using index_select with the lengths so we can use
        # it as a Seq2VecEncoder.
        output = output.transpose(0, 1)
        output = pack_padded_sequence(output, lengths, batch_first=True)
        return output, None
Exemplo n.º 7
0
    def forward(self, state, length):
        # length should be sorted
        assert len(state.size()) == 3 # batch x n_features x input_dim
                                      # input_dim == n_features + 1
        batch_size = state.size()[0]
        self.weight = np.zeros((int(batch_size), self.n_features))#state.data.new(int(batch_size), self.n_features).fill_(0.)
        nonzero = torch.sum(length > 0).cpu().numpy() # encode only nonzero points
        if nonzero == 0:
            return state.new(int(batch_size), self.lstm_size + self.embedded_dim).fill_(0.)

        length_ = list(length[:nonzero].cpu().numpy())
        packed = pack(state[:nonzero], length_, batch_first=True)

        embedded = self.embedder(packed.data)


        if self.normalize:
            embedded = F.normalize(embedded, dim=1)
        embedded = PackedSequence(embedded, packed.batch_sizes)
        embedded, _ = pad(embedded, batch_first=True) # nonzero x max(length) x embedded_dim

        # define initial state
        qt = embedded.new(embedded.size()[0], self.lstm_size).fill_(0.)
        ct = embedded.new(embedded.size()[0], self.lstm_size).fill_(0.)

        ###########################
        # shuffling (set encoding)
        ###########################

        for i in range(self.n_shuffle):
            attended, weight = self.attending(qt, embedded, length[:nonzero])
            # attended : nonzero x embedded_dim
            qt, ct = self.lstm(attended, (qt, ct))

        # TODO edit here!
        weight = weight.detach().cpu().numpy()
        tmp = state[:, :, 1:]
        val, acq = torch.max(tmp, 2) # batch x n_features
        tmp = (val.long() * acq).cpu().numpy()
        #tmp = tmp.cpu().numpy()
        tmp = tmp[:weight.shape[0], :weight.shape[1]]
        self.weight[np.arange(nonzero).reshape(-1, 1), tmp] = weight

        encoded = torch.cat((attended, qt), dim=1)
        if batch_size > nonzero:
            encoded = torch.cat(
                (encoded,
                 encoded.new(int(batch_size - nonzero),
                     encoded.size()[1]).fill_(0.)),
                dim=0
            )
        return encoded
    def forward(self, x):
        # x's are already flanked by the star/stop token as:
        # [stop, x, stop]
        z_fwd, z_rvs = self.embed_and_split(x, pad=False)

        h_fwd, h_rvs = self.transform(z_fwd, z_rvs, last_only=True)

        packed = type(z_fwd) is PackedSequence
        if packed:
            h_flat = h_fwd.data
            logp_fwd = self.linear(h_flat)
            logp_fwd = PackedSequence(logp_fwd, h_fwd.batch_sizes)

            h_flat = h_rvs.data
            logp_rvs = self.linear(h_flat)
            logp_rvs = PackedSequence(logp_rvs, h_rvs.batch_sizes)

            logp_fwd, batch_sizes = pad_packed_sequence(logp_fwd,
                                                        batch_first=True)
            logp_rvs, batch_sizes = pad_packed_sequence(logp_rvs,
                                                        batch_first=True)

        else:
            b = h_fwd.size(0)
            n = h_fwd.size(1)
            h_flat = h_fwd.contiguous().view(-1, h_fwd.size(2))
            logp_fwd = self.linear(h_flat)
            logp_fwd = logp_fwd.view(b, n, -1)

            h_flat = h_rvs.contiguous().view(-1, h_rvs.size(2))
            logp_rvs = self.linear(h_flat)
            logp_rvs = logp_rvs.view(b, n, -1)

        # prepend forward logp with zero
        # postpend reverse logp with zero

        b = h_fwd.size(0)
        zero = h_fwd.data.new(b, 1, logp_fwd.size(2)).zero_()
        logp_fwd = torch.cat([zero, logp_fwd], 1)
        logp_rvs = torch.cat([logp_rvs, zero], 1)

        logp = F.log_softmax(logp_fwd + logp_rvs, dim=2)

        if packed:
            batch_sizes = [s + 1 for s in batch_sizes]
            logp = pack_padded_sequence(logp, batch_sizes, batch_first=True)

        return logp
Exemplo n.º 9
0
    def forward(self, input: PackedSequence):

        input, batch_sizes = input

        seq_len = batch_sizes.size()[0]
        max_batch_size = batch_sizes[0]

        output = input.new_zeros(input.size(0), self.hidden_size)

        hidden_state = input.new_zeros(max_batch_size, self.hidden_size)
        cell_state = input.new_zeros(max_batch_size, self.hidden_size)

        recurrent_mask = get_dropout_mask(
            self.recurrent_dropout_prob,
            hidden_state) if self.training else None

        cumsum_sizes = torch.cumsum(batch_sizes, dim=0)
        for timestep in range(seq_len):
            timestep = timestep if self.go_forward else seq_len - timestep - 1
            len_t = batch_sizes[timestep]
            begin, end = (cumsum_sizes[timestep] - len_t,
                          cumsum_sizes[timestep])

            input_t = input[begin:end]
            hidden_t, cell_t = self.cell(
                input_t, (hidden_state[0:len_t], cell_state[0:len_t]))

            if self.training:
                hidden_t = hidden_t * recurrent_mask[:len_t]

            output[begin:end] = hidden_t
            hidden_state = hidden_state.clone()
            cell_state = cell_state.clone()
            hidden_state[0:batch_sizes[timestep]] = hidden_t
            cell_state[0:batch_sizes[timestep]] = cell_t

        return PackedSequence(output, batch_sizes), (hidden_state, cell_state)