def _reverse(input_, seq_lengths, seq_dim, batch_dim): if seq_lengths is not None: return array_ops.reverse_sequence( input=input_, seq_lengths=seq_lengths, seq_dim=seq_dim, batch_dim=batch_dim) else: return array_ops.reverse(input_, axis=[seq_dim])
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) input_shape = tensor_shape.unknown_shape(ndims=input_seq[0].get_shape().ndims) for input_ in input_seq: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.pack(input_seq) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) for r in result: r.set_shape(input_shape) return result
def testFloatReverseSequenceGrad(self): x = np.asarray( [[[1, 2, 3, 4], [5, 6, 7, 8]], [[9, 10, 11, 12], [13, 14, 15, 16]], [[17, 18, 19, 20], [21, 22, 23, 24]]], dtype=np.float) x = x.reshape(3, 2, 4, 1, 1) x = x.transpose([2, 1, 0, 3, 4]) # transpose axes 0 <=> 2 # reverse dim 0 up to (0:3, none, 0:4) along dim=2 seq_axis = 0 batch_axis = 2 seq_lengths = np.asarray([3, 0, 4], dtype=np.int64) with self.cached_session(): input_t = constant_op.constant(x, shape=x.shape) seq_lengths_t = constant_op.constant(seq_lengths, shape=seq_lengths.shape) reverse_sequence_out = array_ops.reverse_sequence( input_t, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=seq_lengths_t) err = gradient_checker.compute_gradient_error( input_t, x.shape, reverse_sequence_out, x.shape, x_init_value=x) print("ReverseSequence gradient error = %g" % err) self.assertLess(err, 1e-8)
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, depth) lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) for input_ in input_seq: input_.set_shape(input_.get_shape().with_rank(2)) # Join into (time, batch_size, depth) s_joined = array_ops_.pack(input_seq) # Reverse along dimension 0 s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops_.unpack(s_reversed) return result
def _ReverseSequenceGrad(op, grad): seq_lengths = op.inputs[1] return [array_ops.reverse_sequence(grad, batch_dim=op.get_attr("batch_dim"), seq_dim=op.get_attr("seq_dim"), seq_lengths=seq_lengths), None]
def _reverse(input_, seq_lengths, seq_axis, batch_axis): if seq_lengths is not None: return array_ops.reverse_sequence( input=input_, seq_lengths=seq_lengths, seq_axis=seq_axis, batch_axis=batch_axis) else: return array_ops.reverse(input_, axis=[seq_axis])
def _reverse(input_, seq_lengths, seq_dim, batch_dim): if seq_lengths is not None: return array_ops.reverse_sequence( input=input_, seq_lengths=seq_lengths, seq_dim=seq_dim, batch_dim=batch_dim) else: # See b/69305369. assert not use_tpu, ( 'Bidirectional with variable sequence lengths unsupported on TPU') return array_ops.reverse(input_, axis=[seq_dim])
def testShapeFunctionEdgeCases(self): t = array_ops.reverse_sequence( array_ops.placeholder( dtypes.float32, shape=None), seq_lengths=array_ops.placeholder( dtypes.int64, shape=(32,)), batch_axis=0, seq_axis=1) self.assertIs(t.get_shape().ndims, None) # Batch size mismatched between input and seq_lengths. with self.assertRaises(ValueError): array_ops.reverse_sequence( array_ops.placeholder( dtypes.float32, shape=(32, 2, 3)), seq_lengths=array_ops.placeholder( dtypes.int64, shape=(33,)), seq_axis=3) # seq_axis out of bounds. with self.assertRaisesRegexp(ValueError, "seq_dim must be < input rank"): array_ops.reverse_sequence( array_ops.placeholder( dtypes.float32, shape=(32, 2, 3)), seq_lengths=array_ops.placeholder( dtypes.int64, shape=(32,)), seq_axis=3) # batch_axis out of bounds. with self.assertRaisesRegexp(ValueError, "batch_dim must be < input rank"): array_ops.reverse_sequence( array_ops.placeholder( dtypes.float32, shape=(32, 2, 3)), seq_lengths=array_ops.placeholder( dtypes.int64, shape=(32,)), seq_axis=0, batch_axis=3) with self.cached_session(): inputs = array_ops.placeholder(dtypes.float32, shape=(32, 2, 3)) seq_lengths = array_ops.placeholder(dtypes.int64, shape=(32,)) output = array_ops.reverse_sequence( inputs, seq_lengths=seq_lengths, seq_axis=0) # batch_axis default is 0 with self.assertRaisesOpError("batch_dim == seq_dim"): output.eval(feed_dict={ inputs: np.random.rand(32, 2, 3), seq_lengths: xrange(32) })
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) or nested tuples of tensors. lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) input_is_tuple = nest.is_sequence(input_seq[0]) flat_input_seq = (nest.flatten(input_) if input_is_tuple else [input_] for input_ in input_seq) flat_results = [[] for _ in range(len(input_seq))] for sequence in zip(*flat_input_seq): input_shape = tensor_shape.unknown_shape( ndims=sequence[0].get_shape().ndims) for input_ in sequence: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.pack(sequence) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) for r, flat_result in zip(result, flat_results): r.set_shape(input_shape) flat_result.append(r) results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) if input_is_tuple else flat_result[0] for input_, flat_result in zip(input_seq, flat_results)] return results
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) or nested tuples of tensors. lengths: A `Tensor` of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ if lengths is None: return list(reversed(input_seq)) flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) flat_results = [[] for _ in range(len(input_seq))] for sequence in zip(*flat_input_seq): input_shape = tensor_shape.unknown_shape( ndims=sequence[0].get_shape().ndims) for input_ in sequence: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.stack(sequence) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unstack(s_reversed) for r, flat_result in zip(result, flat_results): r.set_shape(input_shape) flat_result.append(r) results = [ nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) for input_, flat_result in zip(input_seq, flat_results) ] return results
def _testReverseSequence(self, x, batch_axis, seq_axis, seq_lengths, truth, use_gpu=False, expected_err_re=None): with self.test_session(use_gpu=use_gpu): ans = array_ops.reverse_sequence( x, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=seq_lengths) if expected_err_re is None: tf_ans = ans.eval() self.assertAllClose(tf_ans, truth, atol=1e-10) self.assertShapeEqual(truth, ans) else: with self.assertRaisesOpError(expected_err_re): ans.eval()
def _testReverseSequence(self, x, batch_axis, seq_axis, seq_lengths, truth, use_gpu=False, expected_err_re=None): with self.cached_session(use_gpu=use_gpu): ans = array_ops.reverse_sequence( x, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=seq_lengths) if expected_err_re is None: tf_ans = self.evaluate(ans) self.assertAllClose(tf_ans, truth, atol=1e-10) self.assertShapeEqual(truth, ans) else: with self.assertRaisesOpError(expected_err_re): self.evaluate(ans)
def biLSTM(inputs, params, num_hidden, param_id): ''' :param inputs: the inputs data e.g.(seqLen, 1, 40), seqLen is time step which represents the number of frames :param params: the list parameters :param num_hidden: the number of hidden cells per layer, e.g.128 :param param_id: the index of which part of parameters to start to get :return: the sum the forward and backward fruits as output ''' c = array_ops.zeros( (1, num_hidden) ) # initial cell state zeros tensor shape (1, 128) for the first time h = array_ops.zeros( (1, num_hidden) ) # initial output state zeros tensor shape (1, 128) for the first time state = c, h forward_inputs = inputs forward_state = state forward_tmp = [] for frame in forward_inputs.eval(): forward_h, forward_state = dynamic_rnn(frame, forward_state, params, param_id) forward_tmp.append(forward_h) forward_h = tf.stack(forward_tmp) # reverse data when do backward LSTM backward_inputs = array_ops.reverse_sequence( input=inputs, seq_lengths=(inputs.get_shape().as_list()[0], ), seq_axis=0, batch_dim=1) backward_state = state backward_tmp = [] for frame in backward_inputs.eval(): backward_h, backward_state = dynamic_rnn(frame, backward_state, params, param_id + 2) backward_tmp.append(backward_h) backward_tmp.reverse() backward_h = tf.stack(backward_tmp) # sum the forward and backward fruits as output hidden = tf.reduce_sum((forward_h, backward_h), axis=0) return hidden
def _testReverseSequence(self, x, batch_axis, seq_axis, seq_lengths, truth, expected_err_re=None): with self.cached_session(): p = array_ops.placeholder(dtypes.as_dtype(x.dtype)) lengths = array_ops.placeholder(dtypes.as_dtype(seq_lengths.dtype)) with self.test_scope(): ans = array_ops.reverse_sequence( p, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=lengths) if expected_err_re is None: tf_ans = ans.eval(feed_dict={p: x, lengths: seq_lengths}) self.assertAllClose(tf_ans, truth, atol=1e-10) else: with self.assertRaisesOpError(expected_err_re): ans.eval(feed_dict={p: x, lengths: seq_lengths})
def _reverse(self, t, lengths): """Time reverse the provided tensor or list of tensors. Assumes the top dimension is the time dimension. Args: t: 3D tensor or list of 2D tensors to be reversed lengths: 1D tensor of lengths, or None Returns: A reversed tensor or list of tensors """ if isinstance(t, list): return list(reversed(t)) else: if lengths is None: return array_ops.reverse(t, [True, False, False]) else: return array_ops.reverse_sequence(t, lengths, 0, 1)
def _reverse(self, t, lengths): """Time reverse the provided tensor or list of tensors. Assumes the top dimension is the time dimension. Args: t: 3D tensor or list of 2D tensors to be reversed lengths: 1D tensor of lengths, or `None` Returns: A reversed tensor or list of tensors """ if isinstance(t, list): return list(reversed(t)) else: if lengths is None: return array_ops.reverse_v2(t, [0]) else: return array_ops.reverse_sequence(t, lengths, 0, 1)
def _testReverseSequence(self, x, batch_axis, seq_axis, seq_lengths, truth, expected_err_re=None): with self.session(): p = array_ops.placeholder(dtypes.as_dtype(x.dtype)) lengths = array_ops.placeholder(dtypes.as_dtype(seq_lengths.dtype)) with self.test_scope(): ans = array_ops.reverse_sequence( p, batch_axis=batch_axis, seq_axis=seq_axis, seq_lengths=lengths) if expected_err_re is None: tf_ans = ans.eval(feed_dict={p: x, lengths: seq_lengths}) self.assertAllClose(tf_ans, truth, atol=1e-10) else: with self.assertRaisesOpError(expected_err_re): ans.eval(feed_dict={p: x, lengths: seq_lengths})
def _reverse_seq(input_seq, lengths): """Reverse a list of Tensors up to specified lengths. Args: input_seq: Sequence of seq_len tensors of dimension (batch_size, depth) lengths: A tensor of dimension batch_size, containing lengths for each sequence in the batch. If "None" is specified, simply reverses the list. Returns: time-reversed sequence """ for input_ in input_seq: input_.set_shape(input_.get_shape().with_rank(2)) # Join into (time, batch_size, depth) s_joined = array_ops_.pack(input_seq) # Reverse along dimension 0 s_reversed = array_ops_.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops_.unpack(s_reversed) return result
def reverse_seq(input_seq, lengths): if lengths is None: return list(reversed(input_seq)) input_shape = tensor_shape.matrix(None, None) for input_ in input_seq: input_shape.merge_with(input_.get_shape()) input_.set_shape(input_shape) # Join into (time, batch_size, depth) s_joined = array_ops.pack(input_seq) # TODO(schuster, ebrevdo): Remove cast when reverse_sequence takes int32 if lengths is not None: lengths = math_ops.to_int64(lengths) # Reverse along dimension 0 s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) # Split again into list result = array_ops.unpack(s_reversed) for r in result: r.set_shape(input_shape) return result
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """ Creates a dynamic version of bidirectional recurrent neural network. The initial state for both directions is zero by default. :param cell_fw: An instance of RNNCell, to be used for forward direction :param cell_bw: An instance of RNNCell, to be used for backward direction :param inputs: The RNN inputs :param sequence_length: An int32/int64 vector :param initial_state_fw: An initial state for the forward RNN :param initial_state_bw: An initial state for the backward RNN :param dtype: The data type for the initial states and expected output :param parallel_iterations: The number of iterations in parallel :param swap_memory: :param time_major: :param scope: :return: A tuple (outputs, output_states) """ if not isinstance(cell_fw, rnn_cell.RNNCell): raise TypeError("cell_fw must be an instance of RNNCell") if not isinstance(cell_bw, rnn_cell.RNNCell): raise TypeError("cell_bw must be an instance of RNNCell") with vs.variable_scope(scope or "bidirectional_rnn"): # Forward direction with vs.variable_scope("fw") as fw_scope: output_fw, output_state_fw = dynamic_rnn( cell=cell_fw, inputs=inputs, sequence_length=sequence_length, initial_state=initial_state_fw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=fw_scope) # Backward direction if not time_major: time_dim = 1 batch_dim = 0 else: time_dim = 0 batch_dim = 1 with vs.variable_scope("bw") as bw_scope: inputs_reverse = array_ops.reverse_sequence( input=inputs, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) tmp_output_bw, tmp_output_state_bw = dynamic_rnn( cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, initial_state=initial_state_bw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=bw_scope) output_bw = array_ops.reverse_sequence( input=tmp_output_bw, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) if Config.cell_type == "LSTMCell": tmp_output_state_bw_ = tmp_output_state_bw.c else: tmp_output_state_bw_ = tmp_output_state_bw output_state_bw = array_ops.reverse_sequence( input=tmp_output_state_bw_, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) outputs = (output_fw, output_bw) if Config.cell_type == "LSTMCell": output_states = (output_state_fw.c, output_state_bw) else: output_states = (output_state_fw, output_state_bw) return (outputs, output_states)
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """Creates a dynamic version of bidirectional recurrent neural network. Similar to the unidirectional case above (rnn) but takes input and builds independent forward and backward RNNs. The input_size of forward and backward cell must match. The initial state for both directions is zero by default (but can be set optionally) and no intermediate states are ever returned -- the network is fully unrolled for the given (passed in) length(s) of the sequence(s) or completely unrolled if length(s) is not given. Args: cell_fw: An instance of RNNCell, to be used for forward direction. cell_bw: An instance of RNNCell, to be used for backward direction. inputs: The RNN inputs. If time_major == False (default), this must be a tensor of shape: `[batch_size, max_time, input_size]`. If time_major == True, this must be a tensor of shape: `[max_time, batch_size, input_size]`. [batch_size, input_size]. sequence_length: An int32/int64 vector, size `[batch_size]`, containing the actual lengths for each of the sequences. initial_state_fw: (optional) An initial state for the forward RNN. This must be a tensor of appropriate type and shape `[batch_size, cell_fw.state_size]`. If `cell_fw.state_size` is a tuple, this should be a tuple of tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. initial_state_bw: (optional) Same as for `initial_state_fw`, but using the corresponding properties of `cell_bw`. dtype: (optional) The data type for the initial states and expected output. Required if initial_states are not provided or RNN states have a heterogeneous dtype. parallel_iterations: (Default: 32). The number of iterations to run in parallel. Those operations which do not have any temporal dependency and can be run in parallel, will be. This parameter trades off time for space. Values >> 1 use more memory but take less time, while smaller values use less memory but computations take longer. swap_memory: Transparently swap the tensors produced in forward inference but needed for back prop from GPU to CPU. This allows training RNNs which would typically not fit on a single GPU, with very minimal (or no) performance penalty. time_major: The shape format of the `inputs` and `outputs` Tensors. If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using `time_major = True` is a bit more efficient because it avoids transposes at the beginning and end of the RNN calculation. However, most TensorFlow data is batch-major, so by default this function accepts input and emits output in batch-major form. dtype: (optional) The data type for the initial state. Required if either of the initial states are not provided. scope: VariableScope for the created subgraph; defaults to "BiRNN" Returns: A tuple (outputs, output_states) where: outputs: A tuple (output_fw, output_bw) containing the forward and the backward rnn output `Tensor`. If time_major == False (default), output_fw will be a `Tensor` shaped: `[batch_size, max_time, cell_fw.output_size]` and output_bw will be a `Tensor` shaped: `[batch_size, max_time, cell_bw.output_size]`. If time_major == True, output_fw will be a `Tensor` shaped: `[max_time, batch_size, cell_fw.output_size]` and output_bw will be a `Tensor` shaped: `[max_time, batch_size, cell_bw.output_size]`. It returns a tuple instead of a single concatenated `Tensor`, unlike in the `bidirectional_rnn`. If the concatenated one is preferred, the forward and backward outputs can be concatenated as `tf.concat(2, outputs)`. output_states: A tuple (output_state_fw, output_state_bw) containing the forward and the backward final states of bidirectional rnn. Raises: TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. """ if not isinstance(cell_fw, tf.contrib.rnn.RNNCell): raise TypeError("cell_fw must be an instance of RNNCell") if not isinstance(cell_bw, tf.contrib.rnn.RNNCell): raise TypeError("cell_bw must be an instance of RNNCell") with vs.variable_scope(scope or "BiRNN"): # Forward direction with vs.variable_scope("FW") as fw_scope: output_fw, output_state_fw = dynamic_rnn( cell=cell_fw, inputs=inputs, sequence_length=sequence_length, initial_state=initial_state_fw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=fw_scope) # Backward direction if not time_major: time_dim = 1 batch_dim = 0 else: time_dim = 0 batch_dim = 1 with vs.variable_scope("BW") as bw_scope: inputs_reverse = array_ops.reverse_sequence( input=inputs, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) tmp, output_state_bw = dynamic_rnn( cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, initial_state=initial_state_bw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=bw_scope) output_bw = array_ops.reverse_sequence(input=tmp, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) outputs = (output_fw, output_bw) output_states = (output_state_fw, output_state_bw) return (outputs, output_states)
def dynamic_bidirectional_rnn(cell_fw, cell_bw, inputs, sequence_length, initial_state_fw=None, initial_state_bw=None, ff_keep_prob=1., recur_keep_prob=True, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """Creates a bidirectional recurrent neural network. Similar to the unidirectional case above (rnn) but takes input and builds independent forward and backward RNNs with the final forward and backward outputs depth-concatenated, such that the output will have the format [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of forward and backward cell must match. The initial state for both directions is zero by default (but can be set optionally) and no intermediate states are ever returned -- the network is fully unrolled for the given (passed in) length(s) of the sequence(s) or completely unrolled if length(s) is not given. Args: cell_fw: An instance of RNNCell, to be used for forward direction. cell_bw: An instance of RNNCell, to be used for backward direction. inputs: A length T list of inputs, each a tensor of shape [batch_size, cell.input_size]. initial_state_fw: (optional) An initial state for the forward RNN. This must be a tensor of appropriate type and shape [batch_size x cell.state_size]. initial_state_bw: (optional) Same as for initial_state_fw. dtype: (optional) The data type for the initial state. Required if either of the initial states are not provided. sequence_length: (optional) An int32/int64 vector, size [batch_size], containing the actual lengths for each of the sequences. scope: VariableScope for the created subgraph; defaults to "BiRNN" Returns: A tuple (outputs, output_state_fw, output_state_bw) where: outputs is a length T list of outputs (one for each input), which are depth-concatenated forward and backward outputs output_state_fw is the final state of the forward rnn output_state_bw is the final state of the backward rnn Raises: TypeError: If "cell_fw" or "cell_bw" is not an instance of RNNCell. ValueError: If inputs is None or an empty list. """ if not isinstance(cell_fw, BaseCell): raise TypeError("cell_fw must be an instance of RNNCell") if not isinstance(cell_bw, BaseCell): raise TypeError("cell_bw must be an instance of RNNCell") if not isinstance(inputs, ops.Tensor): raise TypeError("inputs must be a Tensor") name = scope or "BiRNN" # Forward direction with vs.variable_scope(name + "_FW") as fw_scope: output_fw, output_state_fw = dynamic_rnn(cell_fw, inputs, sequence_length, initial_state_fw, ff_keep_prob, recur_keep_prob, dtype, parallel_iterations, swap_memory, time_major, scope=fw_scope) # Backward direction if time_major: rev_inputs = array_ops.reverse_sequence(inputs, sequence_length, 0, 1) else: rev_inputs = array_ops.reverse_sequence(inputs, sequence_length, 1, 0) with vs.variable_scope(name + "_BW") as bw_scope: tmp, output_state_bw = dynamic_rnn(cell_bw, rev_inputs, sequence_length, initial_state_bw, ff_keep_prob, recur_keep_prob, dtype, parallel_iterations, swap_memory, time_major, scope=bw_scope) if time_major: output_bw = array_ops.reverse_sequence(tmp, sequence_length, 0, 1) else: output_bw = array_ops.reverse_sequence(tmp, sequence_length, 1, 0) # Concat each of the forward/backward outputs outputs = array_ops.concat(2, [output_fw, output_bw]) return (outputs, output_state_fw, output_state_bw)
def _reverse(_input, seq_lengths): return array_ops.reverse_sequence(input=_input, seq_lengths=seq_lengths, seq_dim=1, batch_dim=0)
def testInvalidArguments(self): # Batch size mismatched between input and seq_lengths. # seq_length too long with self.assertRaisesRegex( (ValueError, errors.InvalidArgumentError), (r"Dimensions must be equal|" r"Length of seq_lengths != input.dims\(0\)")): array_ops.reverse_sequence([[1, 2], [3, 4]], [2, 2, 2], seq_axis=1) # seq_length too short with self.assertRaisesRegex( (ValueError, errors.InvalidArgumentError), (r"Dimensions must be equal|" r"Length of seq_lengths != input.dims\(0\)")): array_ops.reverse_sequence([[1, 2], [3, 4]], [2], seq_axis=1) # Invalid seq_length shape with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), ("Shape must be rank 1 but is rank 2|" "seq_lengths must be 1-dim")): array_ops.reverse_sequence([[1, 2], [3, 4]], [[2, 2]], seq_axis=1) # seq_axis out of bounds. with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), "seq_dim must be < input rank"): array_ops.reverse_sequence([[1, 2], [3, 4]], [2, 2], seq_axis=2) # batch_axis out of bounds. with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), "batch_dim must be < input rank"): array_ops.reverse_sequence([[1, 2], [3, 4]], [2, 2], seq_axis=1, batch_axis=3) with self.assertRaisesRegex( (errors.OpError, errors.InvalidArgumentError), "batch_dim == seq_dim == 0"): output = array_ops.reverse_sequence([[1, 2], [3, 4]], [2, 2], seq_axis=0) self.evaluate(output)
def __init__(self, sequence_length, num_classes, vocab_size, embedding_size, hidden_size, filter_sizes, num_filters, l2_reg_lambda=0.0): # Placeholders for input, sequence length, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.seqlen = tf.placeholder(tf.int64, [None], name="seqlen") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): self.W = tf.Variable(tf.random_uniform( [vocab_size, embedding_size], -1.0, 1.0), trainable=True, name="W") self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) #TODO: Embeddings process ignores commas etc. so seqlens might not be accurate for sentences with commas... # Bidirectional LSTM layer with tf.name_scope("bidirectional-lstm"): lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0) lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0) # self.lstm_outputs, _, _ = tf.nn.bidirectional_dynamic_rnn( # lstm_fw_cell, # lstm_bw_cell, # self.embedded_chars, # sequence_length=self.seqlen, # dtype=tf.float32) # lstm_outputs_fw, lstm_outputs_bw = tf.split(value=self.lstm_outputs, split_dim=2, num_split=2) # self.lstm_outputs = tf.add(lstm_outputs_fw, lstm_outputs_bw, name="lstm_outputs") with tf.variable_scope("lstm-output-fw"): self.lstm_outputs_fw, _ = tf.nn.dynamic_rnn( lstm_fw_cell, self.embedded_chars, sequence_length=self.seqlen, dtype=tf.float32) with tf.variable_scope("lstm-output-bw"): self.embedded_chars_rev = array_ops.reverse_sequence( self.embedded_chars, seq_lengths=self.seqlen, seq_dim=1) tmp, _ = tf.nn.dynamic_rnn(lstm_bw_cell, self.embedded_chars_rev, sequence_length=self.seqlen, dtype=tf.float32) self.lstm_outputs_bw = array_ops.reverse_sequence( tmp, seq_lengths=self.seqlen, seq_dim=1) # Concatenate outputs self.lstm_outputs = tf.add(self.lstm_outputs_fw, self.lstm_outputs_bw, name="lstm_outputs") self.lstm_outputs_expanded = tf.expand_dims(self.lstm_outputs, -1) # Convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, hidden_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d(self.lstm_outputs_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(3, pooled_outputs) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # Dropout layer with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): # Standard output weights initialization W = tf.get_variable( "W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") # # Initialized output weights to 0.0, might improve accuracy # W = tf.Variable(tf.constant(0.0, shape=[num_filters_total, num_classes]), name="W") # b = tf.Variable(tf.constant(0.0, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # Calculate mean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( self.scores, self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """Creates a dynamic version of bidirectional recurrent neural network. Similar to the unidirectional case above (rnn) but takes input and builds independent forward and backward RNNs. The input_size of forward and backward cell must match. The initial state for both directions is zero by default (but can be set optionally) and no intermediate states are ever returned -- the network is fully unrolled for the given (passed in) length(s) of the sequence(s) or completely unrolled if length(s) is not given. Args: cell_fw: An instance of RNNCell, to be used for forward direction. cell_bw: An instance of RNNCell, to be used for backward direction. inputs: The RNN inputs. If time_major == False (default), this must be a tensor of shape: `[batch_size, max_time, input_size]`. If time_major == True, this must be a tensor of shape: `[max_time, batch_size, input_size]`. [batch_size, input_size]. sequence_length: An int32/int64 vector, size `[batch_size]`, containing the actual lengths for each of the sequences. initial_state_fw: (optional) An initial state for the forward RNN. This must be a tensor of appropriate type and shape `[batch_size, cell_fw.state_size]`. If `cell_fw.state_size` is a tuple, this should be a tuple of tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. initial_state_bw: (optional) Same as for `initial_state_fw`, but using the corresponding properties of `cell_bw`. dtype: (optional) The data type for the initial states and expected output. Required if initial_states are not provided or RNN states have a heterogeneous dtype. parallel_iterations: (Default: 32). The number of iterations to run in parallel. Those operations which do not have any temporal dependency and can be run in parallel, will be. This parameter trades off time for space. Values >> 1 use more memory but take less time, while smaller values use less memory but computations take longer. swap_memory: Transparently swap the tensors produced in forward inference but needed for back prop from GPU to CPU. This allows training RNNs which would typically not fit on a single GPU, with very minimal (or no) performance penalty. time_major: The shape format of the `inputs` and `outputs` Tensors. If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using `time_major = True` is a bit more efficient because it avoids transposes at the beginning and end of the RNN calculation. However, most TensorFlow data is batch-major, so by default this function accepts input and emits output in batch-major form. dtype: (optional) The data type for the initial state. Required if either of the initial states are not provided. scope: VariableScope for the created subgraph; defaults to "bidirectional_rnn" Returns: A tuple (outputs, output_states) where: outputs: A tuple (output_fw, output_bw) containing the forward and the backward rnn output `Tensor`. If time_major == False (default), output_fw will be a `Tensor` shaped: `[batch_size, max_time, cell_fw.output_size]` and output_bw will be a `Tensor` shaped: `[batch_size, max_time, cell_bw.output_size]`. If time_major == True, output_fw will be a `Tensor` shaped: `[max_time, batch_size, cell_fw.output_size]` and output_bw will be a `Tensor` shaped: `[max_time, batch_size, cell_bw.output_size]`. It returns a tuple instead of a single concatenated `Tensor`, unlike in the `bidirectional_rnn`. If the concatenated one is preferred, the forward and backward outputs can be concatenated as `tf.concat_v2(outputs, 2)`. output_states: A tuple (output_state_fw, output_state_bw) containing the forward and the backward final states of bidirectional rnn. Raises: TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. """ if not isinstance(cell_fw, rnn_cell.RNNCell): raise TypeError("cell_fw must be an instance of RNNCell") if not isinstance(cell_bw, rnn_cell.RNNCell): raise TypeError("cell_bw must be an instance of RNNCell") with vs.variable_scope(scope or "bidirectional_rnn"): # Forward direction with vs.variable_scope("fw") as fw_scope: output_fw, output_state_fw = dynamic_rnn( cell=cell_fw, inputs=inputs, sequence_length=sequence_length, initial_state=initial_state_fw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=fw_scope) # Backward direction if not time_major: time_dim = 1 batch_dim = 0 else: time_dim = 0 batch_dim = 1 with vs.variable_scope("bw") as bw_scope: inputs_reverse = array_ops.reverse_sequence( input=inputs, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) tmp, output_state_bw = dynamic_rnn( cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, initial_state=initial_state_bw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=bw_scope) output_bw = array_ops.reverse_sequence( input=tmp, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) outputs = (output_fw, output_bw) output_states = (output_state_fw, output_state_bw) return (outputs, output_states)
import tensorflow as tf from tensorflow.python.ops.array_ops import reverse_sequence from tensorflow.python.util import nest from tensorflow.python.framework import ops from tensorflow.python.ops.array_ops import rank from tensorflow.python.ops import array_ops i = tf.constant(0) c = lambda i: tf.less(i, 10) b = lambda i: tf.add(i, 1) r = tf.while_loop(c, b, [i]) range = tf.range(20) range = tf.reshape(range, shape=[2, 2, 5]) seqence_lenght = tf.constant(value=[5, 5]) reverse_range = reverse_sequence(range, seq_lengths=seqence_lenght, seq_dim=2, batch_dim=0) splits = array_ops.split(value=range, num_or_size_splits=2, axis=1) with tf.Session() as sess: print('source data1: ', sess.run(range)) print('split: ', sess.run(splits))
def _reverse(_input): return array_ops.reverse_sequence(input=_input, seq_lengths=sequence_lengths, seq_axis=1, batch_axis=0)
def __init__(self, sequence_length, num_classes, vocab_size, embedding_size, hidden_size, filter_sizes, num_filters, l2_reg_lambda=0.0): self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.seqlen = tf.placeholder(tf.int64, [None], name="seqlen") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): self.W = tf.Variable(tf.random_uniform( [vocab_size, embedding_size], -1.0, 1.0), trainable=True, name="W") self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) # Bidirectional LSTM layer with tf.name_scope("bidirectional-lstm"): lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0) lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0) with tf.variable_scope("lstm-output-fw"): self.lstm_outputs_fw, _ = tf.nn.dynamic_rnn( lstm_fw_cell, self.embedded_chars, sequence_length=self.seqlen, dtype=tf.float32) with tf.variable_scope("lstm-output-bw"): self.embedded_chars_rev = array_ops.reverse_sequence( self.embedded_chars, seq_lengths=self.seqlen, seq_dim=1) tmp, _ = tf.nn.dynamic_rnn(lstm_bw_cell, self.embedded_chars_rev, sequence_length=self.seqlen, dtype=tf.float32) self.lstm_outputs_bw = array_ops.reverse_sequence( tmp, seq_lengths=self.seqlen, seq_dim=1) self.lstm_outputs = tf.add(self.lstm_outputs_fw, self.lstm_outputs_bw, name="lstm_outputs") self.lstm_outputs_expanded = tf.expand_dims(self.lstm_outputs, -1) pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): filter_shape = [filter_size, hidden_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d(self.lstm_outputs_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(3, pooled_outputs) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( self.scores, self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def _reverse(input_, seq_lengths, seq_dim, batch_dim): # reverses sequences with right-padding correctly return array_ops.reverse_sequence(input=input_, seq_lengths=seq_lengths, seq_dim=seq_dim, batch_dim=batch_dim)
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """ Creates a dynamic version of bidirectional recurrent neural network. The initial state for both directions is zero by default. :param cell_fw: An instance of RNNCell, to be used for forward direction :param cell_bw: An instance of RNNCell, to be used for backward direction :param inputs: The RNN inputs :param sequence_length: An int32/int64 vector :param initial_state_fw: An initial state for the forward RNN :param initial_state_bw: An initial state for the backward RNN :param dtype: The data type for the initial states and expected output :param parallel_iterations: The number of iterations in parallel :param swap_memory: :param time_major: :param scope: :return: A tuple (outputs, output_states) """ if not isinstance(cell_fw, rnn_cell.RNNCell): raise TypeError("cell_fw must be an instance of RNNCell") if not isinstance(cell_bw, rnn_cell.RNNCell): raise TypeError("cell_bw must be an instance of RNNCell") with vs.variable_scope(scope or "bidirectional_rnn"): # Forward direction with vs.variable_scope("fw") as fw_scope: output_fw, output_state_fw = dynamic_rnn( cell=cell_fw, inputs=inputs, sequence_length=sequence_length, initial_state=initial_state_fw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=fw_scope) # Backward direction if not time_major: time_dim = 1 batch_dim = 0 else: time_dim = 0 batch_dim = 1 with vs.variable_scope("bw") as bw_scope: inputs_reverse = array_ops.reverse_sequence( input=inputs, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) tmp_output_bw, tmp_output_state_bw = dynamic_rnn( cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, initial_state=initial_state_bw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=bw_scope) output_bw = array_ops.reverse_sequence(input=tmp_output_bw, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) if Config.cell_type == "LSTMCell": tmp_output_state_bw_ = tmp_output_state_bw.c else: tmp_output_state_bw_ = tmp_output_state_bw output_state_bw = array_ops.reverse_sequence(input=tmp_output_state_bw_, seq_lengths=sequence_length, seq_dim=time_dim, batch_dim=batch_dim) outputs = (output_fw, output_bw) if Config.cell_type == "LSTMCell": output_states = (output_state_fw.c, output_state_bw) else: output_states = (output_state_fw, output_state_bw) return (outputs, output_states)
def _reverse(input_, seq_lengths, seq_dim, batch_dim): return array_ops.reverse_sequence(input=input_, seq_lengths=seq_lengths, seq_dim=seq_dim, batch_dim=batch_dim)