def forward( self, inputs: PackedSequence, # pylint: disable=arguments-differ # pylint: disable=unused-argument initial_state: torch.Tensor = None ) -> Tuple[PackedSequence, torch.Tensor]: """ Parameters ---------- inputs : ``PackedSequence``, required. A batch first ``PackedSequence`` to run the stacked LSTM over. initial_state : Tuple[torch.Tensor, torch.Tensor], optional, (default = None) Currently, this is ignored. Returns ------- output_sequence : ``PackedSequence`` The encoded sequence of shape (batch_size, sequence_length, hidden_size) final_states: ``torch.Tensor`` The per-layer final (state, memory) states of the LSTM, each with shape (num_layers, batch_size, hidden_size). """ inputs, lengths = pad_packed_sequence(inputs, batch_first=True) # Kernel takes sequence length first tensors. inputs = inputs.transpose(0, 1) sequence_length, batch_size, _ = inputs.size() accumulator_shape = [ self.num_layers, sequence_length + 1, batch_size, self.hidden_size ] state_accumulator = inputs.new_zeros(*accumulator_shape) memory_accumulator = inputs.new_zeros(*accumulator_shape) dropout_weights = inputs.new_ones(self.num_layers, batch_size, self.hidden_size) if self.training: # Normalize by 1 - dropout_prob to preserve the output statistics of the layer. dropout_weights.bernoulli_(1 - self.recurrent_dropout_probability)\ .div_((1 - self.recurrent_dropout_probability)) gates = inputs.new_tensor((self.num_layers, sequence_length, batch_size, 6 * self.hidden_size)) lengths_variable = torch.LongTensor(lengths) implementation = _AlternatingHighwayLSTMFunction( self.input_size, self.hidden_size, num_layers=self.num_layers, train=self.training) output, _ = implementation(inputs, self.weight, self.bias, state_accumulator, memory_accumulator, dropout_weights, lengths_variable, gates) # TODO(Mark): Also return the state here by using index_select with the lengths so we can use # it as a Seq2VecEncoder. output = output.transpose(0, 1) output = pack_padded_sequence(output, lengths, batch_first=True) return output, None
def forward(self, # pylint: disable=arguments-differ sequence_tensor: PackedSequence, initial_state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None) : if not isinstance(inputs, PackedSequence): raise ConfigurationError('inputs must be PackedSequence but got %s' % (type(inputs))) sequence_tensor, batch_lengths = pad_packed_sequence(inputs, batch_first=True) batch_size = sequence_tensor.size()[0] length = sequence_tensor.size()[1] hidden = torch.tensor([]) cell = torch.tensor([]) #初始化所有层隐藏状态 if initial_state is None: hidden = sequence_tensor.new_zeros(self.num_layers, batch_size, self.hidden_size) cell = sequence_tensor.new_zeros(self.num_layers, batch_size, self.n_chunk, self.chunk_size) else: hidden = initial_state[0].squeeze(0) cell = initial_state[1].squeeze(0) if self.training: for c in self.cells: c.sample_masks() final_hidden = [] final_cell = [] for l in range(len(self.cells)): curr_layer = [None] * length t_input = self.cells[l].ih(sequence_tensor) hx = hidden[l].squeeze(0) cx = cell[l].squeeze(0) for t in range(length): hidden, cell = self.cells[l](None, hx, cx, transformed_input=t_input[:, t]) # length, dim hx, cx = hidden, cell # overwritten every timestep curr_layer[t] = hidden final_hidden.append(hx) final_cell.append(cx) # batch, length, dim sequence_tensor = torch.stack(curr_layer, dim=1) if l < len(self.cells) - 1: sequence_tensor = self.lockdrop(sequence_tensor, self.dropout) #每一层LSTM后加lockdrop output = pack_padded_sequence(sequence_tensor, batch_lengths, batch_first=True) final_state = (torch.stack(final_hidden), torch.stack(final_cell)) return sequence_tensor, final_state
def forward(self, inputs: PackedSequence, # pylint: disable=arguments-differ # pylint: disable=unused-argument initial_state: torch.Tensor = None)-> Tuple[PackedSequence, torch.Tensor]: """ Parameters ---------- inputs : ``PackedSequence``, required. A batch first ``PackedSequence`` to run the stacked LSTM over. initial_state : Tuple[torch.Tensor, torch.Tensor], optional, (default = None) Currently, this is ignored. Returns ------- output_sequence : ``PackedSequence`` The encoded sequence of shape (batch_size, sequence_length, hidden_size) final_states: ``torch.Tensor`` The per-layer final (state, memory) states of the LSTM, each with shape (num_layers, batch_size, hidden_size). """ inputs, lengths = pad_packed_sequence(inputs, batch_first=True) # Kernel takes sequence length first tensors. inputs = inputs.transpose(0, 1) sequence_length, batch_size, _ = inputs.size() accumulator_shape = [self.num_layers, sequence_length + 1, batch_size, self.hidden_size] state_accumulator = inputs.new_zeros(*accumulator_shape) memory_accumulator = inputs.new_zeros(*accumulator_shape) dropout_weights = inputs.new_ones(self.num_layers, batch_size, self.hidden_size) if self.training: # Normalize by 1 - dropout_prob to preserve the output statistics of the layer. dropout_weights.bernoulli_(1 - self.recurrent_dropout_probability)\ .div_((1 - self.recurrent_dropout_probability)) gates = inputs.new_tensor((self.num_layers, sequence_length, batch_size, 6 * self.hidden_size)) lengths_variable = torch.LongTensor(lengths) implementation = _AlternatingHighwayLSTMFunction(self.input_size, self.hidden_size, num_layers=self.num_layers, train=self.training) output, _ = implementation(inputs, self.weight, self.bias, state_accumulator, memory_accumulator, dropout_weights, lengths_variable, gates) # TODO(Mark): Also return the state here by using index_select with the lengths so we can use # it as a Seq2VecEncoder. output = output.transpose(0, 1) output = pack_padded_sequence(output, lengths, batch_first=True) return output, None
def forward(self, input: PackedSequence): input, batch_sizes = input seq_len = batch_sizes.size()[0] max_batch_size = batch_sizes[0] output = input.new_zeros(input.size(0), self.hidden_size) hidden_state = input.new_zeros(max_batch_size, self.hidden_size) cell_state = input.new_zeros(max_batch_size, self.hidden_size) recurrent_mask = get_dropout_mask( self.recurrent_dropout_prob, hidden_state) if self.training else None cumsum_sizes = torch.cumsum(batch_sizes, dim=0) for timestep in range(seq_len): timestep = timestep if self.go_forward else seq_len - timestep - 1 len_t = batch_sizes[timestep] begin, end = (cumsum_sizes[timestep] - len_t, cumsum_sizes[timestep]) input_t = input[begin:end] hidden_t, cell_t = self.cell( input_t, (hidden_state[0:len_t], cell_state[0:len_t])) if self.training: hidden_t = hidden_t * recurrent_mask[:len_t] output[begin:end] = hidden_t hidden_state = hidden_state.clone() cell_state = cell_state.clone() hidden_state[0:batch_sizes[timestep]] = hidden_t cell_state[0:batch_sizes[timestep]] = cell_t return PackedSequence(output, batch_sizes), (hidden_state, cell_state)