def seeded_random(seeds, offset, shape, dtype, seed=None, name=None): """ Outputs random values from a uniform distribution. The random values are deterministic given a seed. :param seeds: A vector of seeds (Size: [batch,]) - If 0, defaults to seed attr, then graph seed, then random. :param offset: Integer to add to the seed to get a deterministic mask. :param shape: The shape required for each seed (e.g. [3, 5] with a batch of 10 will return [10, 3, 5]). :param dtype: The type of the output. `float16`, `float32`, `float64` :param seed: A Python integer. Used to create a default seed for the operation. :param name: A name for the operation (optional). :return: A tensor of the specified shape filled with deterministic random values. """ if dtype not in (dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64): raise ValueError('Invalid dtype %r' % dtype) with ops.name_scope(name, 'seeded_random', [shape]): seeds = ops.convert_to_tensor(seeds, dtype=dtypes.int32, name='seeds') shape = ops.convert_to_tensor(shape, dtype=dtypes.int32, name='shape') offset = ops.convert_to_tensor(offset, dtype=dtypes.int32, name='offset') size = math_ops.reduce_prod(shape) graph_seed, op_seed = random_seed.get_seed(seed) matrix_output = SEEDED_RANDOM_SO.seeded_random(seeds, offset, size, seed=graph_seed, seed2=op_seed) output = gen_array_ops.reshape( matrix_output, array_ops.concat([(-1, ), shape], axis=0)) return math_ops.cast(output, dtype)
def __init__(self, cell, order_embedding, candidate_embedding, candidates, sequence_length, initial_state, beam_width, input_layer=None, output_layer=None, time_major=False): """ Initialize the CustomBeamHelper :param cell: An `RNNCell` instance. :param order_embedding: The order embedding vector - Size: (batch, ord_emb_size) :param candidate_embedding: The candidate embedding vector - Size: (batch, cand_emb_size) :param candidates: The candidates at each time step -- Size: (batch, nb_cand, max_candidates) :param sequence_length: The length of each sequence (batch,) :param initial_state: A (possibly nested tuple of...) tensors and TensorArrays. :param beam_width: Python integer, the number of beams. :param input_layer: Optional. A layer to apply on the inputs :param output_layer: Optional. An instance of `tf.layers.Layer`, i.e., `tf.layers.Dense`. Optional layer to apply to the RNN output prior to storing the result or sampling. :param time_major: If true indicates that the first dimension is time, otherwise it is batch size. """ # pylint: disable=super-init-not-called,too-many-arguments rnn_cell_impl.assert_like_rnncell('cell', cell) # pylint: disable=protected-access assert isinstance(beam_width, int), 'beam_width should be a Python integer' self._sequence_length = ops.convert_to_tensor(sequence_length, name='sequence_length') if self._sequence_length.get_shape().ndims != 1: raise ValueError("Expected vector for sequence_length. Shape: %s" % self._sequence_length.get_shape()) candidates = ops.convert_to_tensor(candidates, name='candidates') candidates = nest.map_structure(_transpose_batch_time, candidates) if not time_major else candidates self._cell = cell self._order_embedding_fn = _get_embedding_fn(order_embedding) self._candidate_embedding_fn = _get_embedding_fn(candidate_embedding) self._candidate_tas = nest.map_structure(_unstack_ta, candidates) self._input_layer = input_layer if input_layer is not None else lambda x: x self._output_layer = output_layer self._input_size = order_embedding.shape[-1] if input_layer is not None: self._input_size = self._input_layer.compute_output_shape([None, self._input_size])[-1] self._batch_size = array_ops.size(sequence_length) self._start_tokens = gen_array_ops.fill([self._batch_size * beam_width], GO_ID) self._end_token = -1 self._beam_width = beam_width self._initial_cell_state = nest.map_structure(self._maybe_split_batch_beams, initial_state, self._cell.state_size) self._finished = array_ops.one_hot(array_ops.zeros([self._batch_size], dtype=dtypes.int32), depth=self._beam_width, on_value=False, off_value=True, dtype=dtypes.bool) # Compute input shape self._zero_inputs = \ CandidateInputs(inputs= array_ops.zeros_like(self._split_batch_beams( self._input_layer(self._order_embedding_fn(self._start_tokens)), self._input_size)), candidates=array_ops.zeros_like(candidates[0, :]), candidates_emb=array_ops.zeros_like(self._candidate_embedding_fn(candidates[0, :])))
def __init__(self, decoder_type, inputs, order_embedding, candidate_embedding, sequence_length, candidates, input_layer=None, time_major=False, softmax_temperature=None, seed=None, name=None): """ Constructor :param decoder_type: An uint8 representing TRAINING_DECODER, GREEDY_DECODER, or SAMPLE_DECODER :param inputs: The decoder input (b, dec_len) :param order_embedding: The order embedding vector :param candidate_embedding: The candidate embedding vector :param sequence_length: The length of each input (b,) :param candidates: The candidates at each time step -- Size: (b, nb_cand, max_candidates) :param input_layer: Optional. A layer to apply on the inputs :param time_major: If true indicates that the first dimension is time, otherwise it is batch size :param softmax_temperature: Optional. Softmax temperature. None, scalar, or size: (batch_size,) :param seed: Optional. The sampling seed :param name: Optional scope name. """ # pylint: disable=too-many-arguments with ops.name_scope(name, "CustomHelper", [inputs, sequence_length, order_embedding, candidate_embedding]): inputs = ops.convert_to_tensor(inputs, name="inputs") candidates = ops.convert_to_tensor(candidates, name="candidates") self._inputs = inputs self._order_embedding_fn = _get_embedding_fn(order_embedding) self._candidate_embedding_fn = _get_embedding_fn(candidate_embedding) if not time_major: inputs = nest.map_structure(_transpose_batch_time, inputs) candidates = nest.map_structure(_transpose_batch_time, candidates) self._input_tas = nest.map_structure(_unstack_ta, inputs) self._candidate_tas = nest.map_structure(_unstack_ta, candidates) self._decoder_type = decoder_type self._sequence_length = ops.convert_to_tensor(sequence_length, name="sequence_length") if self._sequence_length.get_shape().ndims != 1: raise ValueError("Expected vector for sequence_length. Shape: %s" % self._sequence_length.get_shape()) self._input_layer = input_layer if input_layer is not None else lambda x: x self._batch_size = array_ops.size(sequence_length) self._start_inputs = gen_array_ops.fill([self._batch_size], GO_ID) self._softmax_temperature = softmax_temperature self._seed = seed # Compute input shape self._zero_inputs = \ CandidateInputs(inputs= array_ops.zeros_like(self._input_layer(self._order_embedding_fn(self._start_inputs))), candidates=array_ops.zeros_like(candidates[0, :]), candidates_emb=array_ops.zeros_like(self._candidate_embedding_fn(candidates[0, :]))) # Preventing div by zero # Adding an extra dim to the matrix, so we can broadcast with the outputs shape if softmax_temperature is not None: self._softmax_temperature = gen_math_ops.maximum(1e-10, self._softmax_temperature) if self._softmax_temperature.get_shape().ndims == 1: self._softmax_temperature = self._softmax_temperature[:, None]
def _shape(batch_size, from_shape): """ Returns the batch_size concatenated with the from_shape """ if (not isinstance(from_shape, tensor_shape.TensorShape) or from_shape.ndims == 0): return tensor_shape.TensorShape(None) batch_size = tensor_util.constant_value( ops.convert_to_tensor(batch_size, name='batch_size')) return tensor_shape.TensorShape([batch_size ]).concatenate(from_shape)
def __init__(self, cell, embedding, mask, sequence_length, initial_state, beam_width, input_layer=None, output_layer=None, time_major=False): """ Initialize the CustomBeamHelper :param cell: An `RNNCell` instance. :param embedding: The embedding vector :param mask: [SparseTensor] Mask to apply at each time step -- Size: (b, dec_len, vocab_size, vocab_size) :param sequence_length: The length of each input (b,) :param initial_state: A (possibly nested tuple of...) tensors and TensorArrays. :param beam_width: Python integer, the number of beams. :param input_layer: Optional. A layer to apply on the inputs :param output_layer: Optional. An instance of `tf.layers.Layer`, i.e., `tf.layers.Dense`. Optional layer to apply to the RNN output prior to storing the result or sampling. :param time_major: If true indicates that the first dimension is time, otherwise it is batch size. """ # pylint: disable=super-init-not-called,too-many-arguments rnn_cell_impl.assert_like_rnncell('cell', cell) # pylint: disable=protected-access assert isinstance(mask, SparseTensor), 'The mask must be a SparseTensor' assert isinstance(beam_width, int), 'beam_width should be a Python integer' self._sequence_length = ops.convert_to_tensor(sequence_length, name='sequence_length') if self._sequence_length.get_shape().ndims != 1: raise ValueError("Expected vector for sequence_length. Shape: %s" % self._sequence_length.get_shape()) self._cell = cell self._embedding_fn = _get_embedding_fn(embedding) self._mask = mask self._time_major = time_major self.vocab_size = VOCABULARY_SIZE self._input_layer = input_layer if input_layer is not None else lambda x: x self._output_layer = output_layer self._input_size = embedding.shape[-1] if input_layer is not None: self._input_size = self._input_layer.compute_output_shape( [None, self._input_size])[-1] self._batch_size = array_ops.size(sequence_length) self._start_tokens = gen_array_ops.fill( [self._batch_size * beam_width], GO_ID) self._end_token = -1 self._beam_width = beam_width self._initial_cell_state = nest.map_structure( self._maybe_split_batch_beams, initial_state, self._cell.state_size) self._finished = array_ops.one_hot(array_ops.zeros([self._batch_size], dtype=dtypes.int32), depth=self._beam_width, on_value=False, off_value=True, dtype=dtypes.bool) # zero_mask is (batch, beam, vocab_size) self._zero_mask = _slice_mask(self._mask, slicing=[-1, 0, GO_ID, -1], squeeze=True, time_major=self._time_major) self._zero_mask = gen_array_ops.tile( array_ops.expand_dims(self._zero_mask, axis=1), [1, self._beam_width, 1]) self._zero_inputs = \ MaskedInputs( inputs=array_ops.zeros_like( self._split_batch_beams( self._input_layer(self._embedding_fn(self._start_tokens)), self._input_size)), mask=self._zero_mask)
def __init__(self, decoder_type, inputs, embedding, sequence_length, mask, input_layer=None, time_major=False, softmax_temperature=None, seed=None, name=None): """ Constructor :param decoder_type: An uint8 representing TRAINING_DECODER, GREEDY_DECODER, or SAMPLE_DECODER :param inputs: The decoder input (b, dec_len) :param embedding: The embedding vector :param sequence_length: The length of each input (b,) :param mask: [SparseTensor] Mask to apply at each time step -- Size: (b, dec_len, vocab_size, vocab_size) :param input_layer: Optional. A layer to apply on the inputs :param time_major: If true indicates that the first dimension is time, otherwise it is batch size :param softmax_temperature: Optional. Softmax temperature. None or size: (batch_size,) :param seed: Optional. The sampling seed :param name: Optional scope name. """ # pylint: disable=too-many-arguments with ops.name_scope(name, "CustomHelper", [inputs, sequence_length, embedding]): assert isinstance(mask, SparseTensor), 'The mask must be a SparseTensor' inputs = ops.convert_to_tensor(inputs, name="inputs") self._inputs = inputs self._mask = mask self._time_major = time_major self._embedding_fn = embedding if callable( embedding) else lambda ids: embedding_lookup(embedding, ids) if not time_major: inputs = nest.map_structure(_transpose_batch_time, inputs) self._input_tas = nest.map_structure(_unstack_ta, inputs) self._decoder_type = decoder_type self._sequence_length = ops.convert_to_tensor( sequence_length, name="sequence_length") if self._sequence_length.get_shape().ndims != 1: raise ValueError( "Expected vector for sequence_length. Shape: %s" % self._sequence_length.get_shape()) self._input_layer = input_layer if callable( input_layer) else lambda x: x self._batch_size = array_ops.size(sequence_length) self._start_inputs = gen_array_ops.fill([self._batch_size], GO_ID) self._softmax_temperature = softmax_temperature self._seed = seed self.vocab_size = VOCABULARY_SIZE self._zero_inputs = \ MaskedInputs(inputs=array_ops.zeros_like(self._input_layer(self._embedding_fn(self._start_inputs))), mask=_slice_mask(self._mask, slicing=[-1, 0, GO_ID, -1], squeeze=True, time_major=self._time_major)) # Preventing div by zero # Adding an extra dim to the matrix, so we can broadcast with the outputs shape if softmax_temperature is not None: self._softmax_temperature = gen_math_ops.maximum( 1e-10, self._softmax_temperature) if self._softmax_temperature.get_shape().ndims == 1: self._softmax_temperature = self._softmax_temperature[:, None]
def seeded_dropout(inputs, seeds, keep_probs, offset=None, noise_shape=None, seed=None, name=None): """ Computes dropout (with a deterministic mask). Every item in the batch has a deterministic seed to compute the deterministic mask With probability `keep_probs`, outputs the input element scaled up by `1 / keep_prob`, otherwise outputs `0`. The scaling is so that the expected sum is unchanged. By default, each element is kept or dropped independently. If `noise_shape` is specified, it must be broadcastable to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]` will make independent decisions. For example, if `shape(x) = [k, l, m, n]` and `noise_shape = [k, 1, 1, n]`, each batch and channel component will be kept independently and each row and column will be kept or not kept together. :param inputs: A floating point tensor. :param seeds: A tensor representing the seed for each item in the batch. (Size: (batch,)) :param keep_probs: A scalar or vector of size (batch,). The probability that each element is kept. :param offset: Integer. Alternative offset to apply to compute the deterministic mask (e.g. in a loop). :param noise_shape: A 1-D `Tensor` of type `int32`, represents the shape for randomly generated keep/drop flags. :param seed: A Python integer. Used to create a default seed for the operation. :param name: name: A name for this operation (optional). :return: A Tensor of the same shape of `x`. """ if offset is None: seeded_dropout.offset += 40555607 # If inputs is a scalar, this is likely the 'time' attribute in a state, we don't want to mask it # Same thing for integers - We can safely ignore them # So we don't want to mask it if not inputs.shape or inputs.dtype.is_integer: return inputs with ops.name_scope(name, 'seeded_dropout', [inputs]): inputs = ops.convert_to_tensor(inputs, name='x') if not inputs.dtype.is_floating: raise ValueError( 'Expected a floating point tensor. Got a %s tensor instead.' % inputs.dtype) if isinstance(keep_probs, float) and not 0 < keep_probs <= 1: raise ValueError( 'keep_probs must be a scalar tensor or a float in the range (0, 1], got %g' % keep_probs) # Early return if nothing needs to be dropped. if isinstance(keep_probs, float) and keep_probs == 1: return inputs # Not supported in eager mode if context.executing_eagerly(): raise ValueError('This function is not supported in eager mode.') # Converting to tensor keep_probs = ops.convert_to_tensor(keep_probs, dtype=inputs.dtype, name='keep_probs') keep_probs = gen_math_ops.maximum(0., gen_math_ops.minimum(1., keep_probs)) keep_probs = gen_array_ops.reshape(keep_probs, [-1] + [1] * (len(inputs.shape) - 1)) all_keep_probs_are_one = math_ops.reduce_all( gen_math_ops.equal(keep_probs, 1.)) # Computing noise shape noise_shape = nn_ops._get_noise_shape(inputs, noise_shape) # pylint: disable=protected-access def get_dropout_mask(): """ Computes the dropout mask """ # random_tensor = uniform [keep_probs, 1.0 + keep_probs) random_tensor = keep_probs random_tensor += seeded_random( seeds, offset=offset if offset is not None else seeded_dropout.offset, shape=noise_shape[1:], dtype=inputs.dtype, seed=seed) # 0. if [keep_probs, 1.0) and 1. if [1.0, 1.0 + keep_prob) binary_tensor = gen_math_ops.floor(random_tensor) ret = math_ops.divide(inputs, keep_probs) * binary_tensor ret.set_shape(inputs.get_shape()) # Setting control flow ops to avoid computing this function if not required with ops.control_dependencies([ret]): return array_ops.identity(ret) # Returning the dropout mask return control_flow_ops.cond(all_keep_probs_are_one, true_fn=lambda: inputs, false_fn=get_dropout_mask)
def _convert_to_probs_tensor(keep_probs): """ Converts a keep_probs tensor to its broadcastable shape """ probs_tensor = ops.convert_to_tensor(keep_probs) probs_tensor = gen_math_ops.maximum( 0., gen_math_ops.minimum(1., probs_tensor)) return gen_array_ops.reshape(probs_tensor, [-1, 1])
def dynamic_decode(decoder, output_time_major=False, impute_finished=False, maximum_iterations=None, parallel_iterations=32, invariants_map=None, swap_memory=False, scope=None): """ Performs dynamic decoding with `decoder`. :param decoder: A `Decoder` instance. :param output_time_major: If True, outputs [time, batch, ...], otherwise outputs [batch, time, ...] :param impute_finished: If true, finished states are copied through the end of the game :param maximum_iterations: Int or None. The maximum number of steps (otherwise decode until it's done) :param parallel_iterations: Argument passed to tf.while_loop :param invariants_map: Optional. Dictionary of tensor path (in initial_state) to its shape invariant. :param swap_memory: Argument passed to `tf.while_loop`. :param scope: Optional variable scope to use. :return: A tuple of 1) final_outputs, 2) final_state, 3) final_sequence_length """ if not isinstance(decoder, seq2seq.Decoder): raise TypeError('Expected decoder to be type Decoder, but saw: %s' % type(decoder)) with variable_scope.variable_scope(scope, 'decoder') as varscope: # Determine context types. ctxt = ops.get_default_graph()._get_control_flow_context() # pylint: disable=protected-access is_xla = control_flow_util.GetContainingXLAContext(ctxt) is not None in_while_loop = control_flow_util.GetContainingWhileContext( ctxt) is not None # Properly cache variable values inside the while_loop. # Don't set a caching device when running in a loop, since it is possible that train steps could be wrapped # in a tf.while_loop. In that scenario caching prevents forward computations in loop iterations from re-reading # the updated weights. if not context.executing_eagerly() and not in_while_loop: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) # Setting maximum iterations if maximum_iterations is not None: maximum_iterations = ops.convert_to_tensor( maximum_iterations, dtype=dtypes.int32, name="maximum_iterations") if maximum_iterations.get_shape().ndims != 0: raise ValueError('maximum_iterations must be a scalar') def _inv_shape(maybe_ta): """ Returns the invariatns shape """ if isinstance(maybe_ta, tensor_array_ops.TensorArray): return maybe_ta.flow.shape return maybe_ta.shape def _invariants(structure): """ Returns the invariants of a structure """ return nest.map_structure(_inv_shape, structure) def _map_invariants(structure): """ Returns the invariants of a structure, but replaces the invariant using the value in invariants_map """ return nest.map_structure_with_paths( lambda path, tensor: (invariants_map or {}).get(path, _inv_shape(tensor)), structure) # Initializing decoder initial_finished, initial_inputs, initial_state = decoder.initialize() zero_outputs = _create_zero_outputs(decoder.output_size, decoder.output_dtype, decoder.batch_size) if is_xla and maximum_iterations is None: raise ValueError( 'maximum_iterations is required for XLA compilation.') if maximum_iterations is not None: initial_finished = gen_math_ops.logical_or(initial_finished, maximum_iterations <= 0) initial_sequence_lengths = array_ops.zeros_like(initial_finished, dtype=dtypes.int32) initial_time = constant_op.constant(0, dtype=dtypes.int32) # Creating initial output TA def _shape(batch_size, from_shape): """ Returns the batch_size concatenated with the from_shape """ if (not isinstance(from_shape, tensor_shape.TensorShape) or from_shape.ndims == 0): return tensor_shape.TensorShape(None) batch_size = tensor_util.constant_value( ops.convert_to_tensor(batch_size, name='batch_size')) return tensor_shape.TensorShape([batch_size ]).concatenate(from_shape) dynamic_size = maximum_iterations is None or not is_xla def _create_ta(shape, dtype): """ Creates a tensor array""" return tensor_array_ops.TensorArray( dtype=dtype, size=0 if dynamic_size else maximum_iterations, dynamic_size=dynamic_size, element_shape=_shape(decoder.batch_size, shape)) initial_outputs_ta = nest.map_structure(_create_ta, decoder.output_size, decoder.output_dtype) def condition(unused_time, unused_outputs_ta, unused_state, unused_inputs, finished, unused_sequence_lengths): """ While loop condition""" return gen_math_ops.logical_not(math_ops.reduce_all(finished)) def body(time, outputs_ta, state, inputs, finished, sequence_lengths): """ Internal while_loop body. """ (next_outputs, decoder_state, next_inputs, decoder_finished) = decoder.step(time, inputs, state) if decoder.tracks_own_finished: next_finished = decoder_finished else: next_finished = gen_math_ops.logical_or( decoder_finished, finished) next_sequence_lengths = array_ops.where( gen_math_ops.logical_not(finished), gen_array_ops.fill(array_ops.shape(sequence_lengths), time + 1), sequence_lengths) nest.assert_same_structure(state, decoder_state) nest.assert_same_structure(outputs_ta, next_outputs) nest.assert_same_structure(inputs, next_inputs) # Zero out output values past finish if impute_finished: emit = nest.map_structure( lambda out, zero: array_ops.where(finished, zero, out), next_outputs, zero_outputs) else: emit = next_outputs # Copy through states past finish def _maybe_copy_state(new, cur): # TensorArrays, multiple dynamic dims, and scalar states get passed through. if isinstance(cur, tensor_array_ops.TensorArray): pass_through = True elif None in new.shape.as_list()[1:]: pass_through = True else: new.set_shape(cur.shape) pass_through = (new.shape.ndims == 0) return new if pass_through else array_ops.where( finished, cur, new) if impute_finished: next_state = nest.map_structure(_maybe_copy_state, decoder_state, state) else: next_state = decoder_state outputs_ta = nest.map_structure( lambda ta, out: ta.write(time, out), outputs_ta, emit) return (time + 1, outputs_ta, next_state, next_inputs, next_finished, next_sequence_lengths) res = control_flow_ops.while_loop( condition, body, loop_vars=(initial_time, initial_outputs_ta, initial_state, initial_inputs, initial_finished, initial_sequence_lengths), shape_invariants=(_invariants(initial_time), _invariants(initial_outputs_ta), _map_invariants(initial_state), _invariants(initial_inputs), _invariants(initial_finished), _invariants(initial_sequence_lengths)), parallel_iterations=parallel_iterations, maximum_iterations=maximum_iterations, swap_memory=swap_memory) final_outputs_ta = res[1] final_state = res[2] final_sequence_lengths = res[5] final_outputs = nest.map_structure(lambda ta: ta.stack(), final_outputs_ta) try: final_outputs, final_state = decoder.finalize( final_outputs, final_state, final_sequence_lengths) except NotImplementedError: pass if not output_time_major: final_outputs = nest.map_structure(_transpose_batch_time, final_outputs) return final_outputs, final_state, final_sequence_lengths