示例#1
0
  def call(self, inputs, mask=None):
    """Call the model on new inputs.

    In this case `call` just reapplies
    all ops in the graph to the new inputs
    (e.g. build a new computational graph from the provided inputs).

    Arguments:
        inputs: A tensor or list of tensors.
        mask: A mask or list of masks. A mask can be
            either a tensor or None (no mask).

    Returns:
        A tensor if there is a single output, or
        a list of tensors if there are more than one outputs.
    """
    inputs = nest.flatten(inputs)
    if mask is None:
      masks = [None for _ in range(len(inputs))]
    else:
      masks = nest.flatten(mask)

    if context.in_graph_mode():
      # Try to retrieve cached outputs if the layer has already been called
      # on these exact inputs.
      cache_key = (layers_util.object_list_uid(inputs)
                   + '_' + layers_util.object_list_uid(masks))
      if cache_key in self._output_tensor_cache:
        # Cache hit.
        return self._output_tensor_cache[cache_key]
    # Actually apply the network graph to the new inputs.
    outputs, _ = self._run_internal_graph(inputs, masks)
    return outputs
示例#2
0
def convert_to_generator_like(data,
                              batch_size=None,
                              steps_per_epoch=None,
                              epochs=1,
                              shuffle=False):
  """Make a generator out of NumPy or EagerTensor inputs.

  Arguments:
    data: Either a generator or `keras.utils.data_utils.Sequence` object or
      `Dataset` or `EagerIterator` or a {1,2,3}-tuple of NumPy arrays or
      EagerTensors. If a tuple, the elements represent `(x, y, sample_weights)`
      and may be `None` or `[None]`.
    batch_size: Used when creating a generator out of tuples of NumPy arrays or
      EagerTensors.
    steps_per_epoch: Steps of the generator to run each epoch.
    epochs: Total number of epochs to run.
    shuffle: Whether the data should be shuffled.

  Returns:
    - Generator or `keras.utils.data_utils.Sequence` or EagerIterator.

  Raises:
    - ValueError: If `batch_size` is not provided for NumPy or EagerTensor
      inputs.
  """
  if isinstance(data, tuple):
    # Scrub `Nones` that might have been passed for `targets`, `sample_weights`.
    data = tuple(
        ele for ele in data if not all(e is None for e in nest.flatten(ele)))
    if len(data) == 1:
      data = data[0]

  if data_utils.is_generator_or_sequence(data) or isinstance(
      data, iterator_ops.EagerIterator):
    if isinstance(data, data_utils.Sequence):
      steps_per_epoch = len(data)
    return data, steps_per_epoch
  if isinstance(data, dataset_ops.DatasetV2):
    return dataset_ops.make_one_shot_iterator(data), steps_per_epoch

  # Create generator from NumPy or EagerTensor Input.
  num_samples = int(nest.flatten(data)[0].shape[0])
  if batch_size is None:
    raise ValueError('You must specify `batch_size`')
  steps_per_epoch = int(math.ceil(num_samples / batch_size))

  def _gen(data):
    """Makes a generator out of a structure of NumPy/EagerTensors."""
    index_array = np.arange(num_samples)
    for _ in range(epochs):
      if shuffle:
        np.random.shuffle(index_array)
      batches = generic_utils.make_batches(num_samples, batch_size)
      for (batch_start, batch_end) in batches:
        batch_ids = index_array[batch_start:batch_end]
        flat_batch_data = training_utils.slice_arrays(
            nest.flatten(data), batch_ids, contiguous=(not shuffle))
        yield nest.pack_sequence_as(data, flat_batch_data)

  return _gen(data), steps_per_epoch
示例#3
0
    def _hierarchical_pad(input_, output, control):
      """Pad and flatten hierarchical inputs, outputs, and controls."""
      # Pad empty segments with end tokens and flatten hierarchy.
      input_ = nest.flatten(pad_with_element(
          input_, self._max_lengths[:-1],
          data.np_onehot([self.end_token], self.input_depth)))
      output = nest.flatten(pad_with_element(
          output, self._max_lengths[:-1],
          data.np_onehot([self.end_token], self.output_depth)))
      length = np.squeeze(np.array([len(x) for x in input_], np.int32))

      # Pad and concatenate flatten hierarchy.
      input_ = np.concatenate(
          [pad_with_value(x, self._max_lengths[-1], 0) for x in input_])
      output = np.concatenate(
          [pad_with_value(x, self._max_lengths[-1], 0) for x in output])

      if np.size(control):
        control = nest.flatten(pad_with_element(
            control, self._max_lengths[:-1],
            data.np_onehot(
                [self._control_pad_token], self.control_depth)))
        control = np.concatenate(
            [pad_with_value(x, self._max_lengths[-1], 0) for x in control])

      return input_, output, control, length
示例#4
0
def _eager_metrics_fn(model, outputs, targets, sample_weights=None, masks=None):
  """Calculates the metrics for each output of the given model.

  Arguments:
      model: The model on which metrics are being calculated.
      outputs: The outputs of the given model.
      targets: The predictions or targets of the given model.
      sample_weights: Optional list of sample weights for each output.
      masks: Optional list of masks for each output.

  Returns:
      Returns the metric results for each output of the model.
  """
  outputs = nest.flatten(outputs)
  targets = nest.flatten(targets)
  # TODO(psv): Consider supporting skip target indices in eager mode?
  # Invoke all(weighted and unweighted) metrics.
  metric_results = []
  if targets:
    metric_results = model._handle_metrics(
        outputs,
        targets=targets,
        sample_weights=sample_weights,
        masks=masks,
        return_weighted_and_unweighted_metrics=True)

  # Add metric results from the `add_metric` metrics.
  metric_results.extend([
      m.result()
      for m in model.metrics
      if m not in model._compile_metric_functions
  ])
  return metric_results
示例#5
0
  def _get_cached_states(self, times):
    """Retrieve cached states for a batch of times."""
    read_chunk_numbers = self._get_chunk_number(times)
    looked_up_state = list(self._cached_states.lookup(
        math_ops.cast(read_chunk_numbers, dtypes.int64)))
    looked_up_state = tuple(looked_up_state)
    # We need to special-case the first chunk in a series to explicitly rely on
    # the model's starting state so that gradients flow back to it. Otherwise it
    # would affect only initialization, and would not be read from or updated
    # during training. Not doing this also isolates that part of the graph,
    # leading to errors on model reload if there are trainable variables
    # affecting a model's start state.
    if self._input_statistics is not None:
      start_time = self._input_statistics.start_time
    else:
      start_time = 0
    set_to_start_state = math_ops.equal(read_chunk_numbers,
                                        self._get_chunk_number(start_time))
    new_states = []
    for start_state_value, cache_variable in zip(
        nest.flatten(
            math_utils.replicate_state(self._start_state,
                                       array_ops.shape(times)[0])),
        nest.flatten(looked_up_state)):

      new_states.append(
          array_ops.where(set_to_start_state, start_state_value,
                          cache_variable))
    looked_up_state = nest.pack_sequence_as(looked_up_state, new_states)
    return looked_up_state
示例#6
0
  def testFlattenAndPack(self):
    structure = ((3, 4), 5, (6, 7, (9, 10), 8))
    flat = ["a", "b", "c", "d", "e", "f", "g", "h"]
    self.assertEqual(nest.flatten(structure), [3, 4, 5, 6, 7, 9, 10, 8])
    self.assertEqual(
        nest.pack_sequence_as(structure, flat), (("a", "b"), "c",
                                                 ("d", "e", ("f", "g"), "h")))
    point = collections.namedtuple("Point", ["x", "y"])
    structure = (point(x=4, y=2), ((point(x=1, y=0),),))
    flat = [4, 2, 1, 0]
    self.assertEqual(nest.flatten(structure), flat)
    restructured_from_flat = nest.pack_sequence_as(structure, flat)
    self.assertEqual(restructured_from_flat, structure)
    self.assertEqual(restructured_from_flat[0].x, 4)
    self.assertEqual(restructured_from_flat[0].y, 2)
    self.assertEqual(restructured_from_flat[1][0][0].x, 1)
    self.assertEqual(restructured_from_flat[1][0][0].y, 0)

    self.assertEqual([5], nest.flatten(5))
    self.assertEqual([np.array([5])], nest.flatten(np.array([5])))

    self.assertEqual("a", nest.pack_sequence_as(5, ["a"]))
    self.assertEqual(
        np.array([5]), nest.pack_sequence_as("scalar", [np.array([5])]))

    with self.assertRaisesRegexp(ValueError, "Structure is a scalar"):
      nest.pack_sequence_as("scalar", [4, 5])

    with self.assertRaisesRegexp(TypeError, "flat_sequence"):
      nest.pack_sequence_as([4, 5], "bad_sequence")

    with self.assertRaises(ValueError):
      nest.pack_sequence_as([5, 6, [7, 8]], ["a", "b", "c"])
示例#7
0
 def _apply_exogenous_update(
     self, current_times, step_number, state, raw_features,
     embedded_exogenous_regressors):
   """Performs a conditional state update based on exogenous features."""
   if embedded_exogenous_regressors is None:
     return state
   else:
     current_exogenous_regressors = embedded_exogenous_regressors[
         :, step_number, :]
     exogenous_updated_state = self._exogenous_input_step(
         current_times=current_times,
         current_exogenous_regressors=current_exogenous_regressors,
         state=state)
     if self._exogenous_update_condition is not None:
       current_raw_exogenous_features = {
           key: value[:, step_number] for key, value in raw_features.items()
           if key not in [PredictionFeatures.STATE_TUPLE,
                          TrainEvalFeatures.TIMES,
                          TrainEvalFeatures.VALUES]}
       conditionally_updated_state_flat = []
       for updated_state_element, original_state_element in zip(
           nest.flatten(exogenous_updated_state),
           nest.flatten(state)):
         conditionally_updated_state_flat.append(
             array_ops.where(
                 self._exogenous_update_condition(
                     times=current_times,
                     features=current_raw_exogenous_features),
                 updated_state_element,
                 original_state_element))
       return nest.pack_sequence_as(state, conditionally_updated_state_flat)
     else:
       return exogenous_updated_state
示例#8
0
def _eager_metrics_fn(model,
                      outputs,
                      targets,
                      sample_weights=None,
                      masks=None,
                      return_stateful_result=True):
  """Calculates the metrics for each output of the given model.

  Arguments:
      model: The model on which metrics are being calculated.
      outputs: The outputs of the given model.
      targets: The predictions or targets of the given model.
      sample_weights: Optional list of sample weights for each output.
      masks: Optional list of masks for each output.
      return_stateful_result: Boolean, indicates whether the stateful
        (aggregated)/stateless metric result should be returned.

  Returns:
      Returns the metric results for each output of the model.
  """
  outputs = nest.flatten(outputs)
  targets = nest.flatten(targets)
  # TODO(psv): Consider supporting skip target indices in eager mode?
  metric_results = model._handle_metrics(
      outputs,
      targets=targets,
      sample_weights=sample_weights,
      masks=masks,
      return_stateful_result=return_stateful_result)
  return [backend.mean(t) for t in metric_results]
示例#9
0
def _Update(struct_acc, struct_x, t):
  """Updates t-th row in accumulators.

  Args:
    struct_acc: The accumulators. A structure of tensors.
    struct_x: The new values. A structure of tensors congruent to `struct_acc`.
    t: A scalar integer. Performance is better if `t` is on the device
      memory.

  Returns:
    A structure of tensors. Say, ret is a returned dictionary. Then, for
    each key, we have:
      ret[key] = struct_acc[key];
      ret[key][t, :] = struct_x[key]
  """
  to_skip_update = set()
  acc_lst = nest.flatten(struct_acc)
  x_lst = nest.flatten(struct_x)
  t = math_ops.to_int32([t])  # tf.to_int32 casts on-device tensors.
  lst = []
  for acc, x in zip(acc_lst, x_lst):
    if acc in to_skip_update:
      # Until b/62105730 is fixed, we need to avoid inplace update for tensors
      # of rank 1.  could reshape to handle it, but we don't really need the
      # values applied to these, so just skip their modification.
      lst += [acc]
    else:
      lst += [alias_inplace_update(acc, t, array_ops.expand_dims(x, 0))]
  return nest.pack_sequence_as(struct_acc, lst)
示例#10
0
def _create_multi_lstm_cell_ops(batch_size, num_units, input_depth,
                                num_layers, max_time, compiled):
  with variable_scope.variable_scope(
      "root",
      initializer=init_ops.random_uniform_initializer(-0.1, 0.1, seed=2)):
    inputs = variable_scope.get_variable(
        "inputs", initializer=random_ops.random_uniform(
            (max_time, batch_size, input_depth), seed=1))
    maybe_xla = lambda c: rnn_cell.CompiledWrapper(c) if compiled else c
    cell = core_rnn_cell_impl.MultiRNNCell(
        [maybe_xla(core_rnn_cell_impl.LSTMCell(num_units))
         for _ in range(num_layers)])
    initial_state = cell.zero_state(
        batch_size=batch_size, dtype=dtypes.float32)
    outputs, final_state = rnn.dynamic_rnn(
        cell=cell, inputs=inputs, initial_state=initial_state,
        time_major=True)
    flat_final_state = nest.flatten(final_state)
    trainable_variables = variables.trainable_variables()
    outputs_grad = gradients_impl.gradients(
        [outputs],
        trainable_variables + [inputs] + nest.flatten(initial_state))
    final_state_grad = gradients_impl.gradients(
        flat_final_state,
        trainable_variables + [inputs] + nest.flatten(initial_state))

    return {"outputs": outputs,
            "final_state": flat_final_state,
            "outputs_grad": outputs_grad,
            "final_state_grad": final_state_grad}
示例#11
0
        def body(time, elements_finished, current_input, emit_ta, state, loop_state):
            """Internal while loop body for raw_rnn.

      Args:
        time: time scalar.
        elements_finished: batch-size vector.
        current_input: possibly nested tuple of input tensors.
        emit_ta: possibly nested tuple of output TensorArrays.
        state: possibly nested tuple of state tensors.
        loop_state: possibly nested tuple of loop state tensors.

      Returns:
        Tuple having the same size as Args but with updated values.
      """
            (next_output, cell_state) = cell(current_input, state)

            nest.assert_same_structure(state, cell_state)
            nest.assert_same_structure(cell.output_size, next_output)

            next_time = time + 1
            (next_finished, next_input, next_state, emit_output, next_loop_state) = loop_fn(
                next_time, next_output, cell_state, loop_state
            )

            nest.assert_same_structure(state, next_state)
            nest.assert_same_structure(current_input, next_input)
            nest.assert_same_structure(emit_ta, emit_output)

            # If loop_fn returns None for next_loop_state, just reuse the
            # previous one.
            loop_state = loop_state if next_loop_state is None else next_loop_state

            def _copy_some_through(current, candidate):
                """Copy some tensors through via array_ops.where."""
                current_flat = nest.flatten(current)
                candidate_flat = nest.flatten(candidate)
                # pylint: disable=g-long-lambda,cell-var-from-loop
                result_flat = [
                    _on_device(
                        lambda: array_ops.where(elements_finished, current_i, candidate_i), device=candidate_i.op.device
                    )
                    for (current_i, candidate_i) in zip(current_flat, candidate_flat)
                ]
                # pylint: enable=g-long-lambda,cell-var-from-loop
                return nest.pack_sequence_as(structure=current, flat_sequence=result_flat)

            emit_output = _copy_some_through(zero_emit, emit_output)
            next_state = _copy_some_through(state, next_state)

            emit_output_flat = nest.flatten(emit_output)
            emit_ta_flat = nest.flatten(emit_ta)

            elements_finished = math_ops.logical_or(elements_finished, next_finished)

            emit_ta_flat = [ta.write(time, emit) for (ta, emit) in zip(emit_ta_flat, emit_output_flat)]

            emit_ta = nest.pack_sequence_as(structure=emit_structure, flat_sequence=emit_ta_flat)

            return (next_time, elements_finished, next_input, emit_ta, next_state, loop_state)
示例#12
0
  def testNoProjNoShardingNestedTupleStateSaver(self):
    num_units = 3
    input_size = 5
    batch_size = 2
    max_length = 8
    with self.test_session(graph=tf.Graph()) as sess:
      initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=self._seed)
      state_saver = TestStateSaver(batch_size, {"c0": num_units,
                                                "m0": num_units,
                                                "c1": num_units + 1,
                                                "m1": num_units + 1,
                                                "c2": num_units + 2,
                                                "m2": num_units + 2,
                                                "c3": num_units + 3,
                                                "m3": num_units + 3})
      def _cell(i):
        return tf.contrib.rnn.LSTMCell(
            num_units + i, use_peepholes=False, initializer=initializer,
            state_is_tuple=True)

      # This creates a state tuple which has 4 sub-tuples of length 2 each.
      cell = tf.contrib.rnn.MultiRNNCell(
          [_cell(i) for i in range(4)], state_is_tuple=True)

      self.assertEqual(len(cell.state_size), 4)
      for i in range(4):
        self.assertEqual(len(cell.state_size[i]), 2)

      inputs = max_length * [
          tf.placeholder(tf.float32, shape=(batch_size, input_size))]

      state_names = (("c0", "m0"), ("c1", "m1"),
                     ("c2", "m2"), ("c3", "m3"))
      with tf.variable_scope("share_scope"):
        outputs, state = tf.contrib.rnn.static_state_saving_rnn(
            cell, inputs, state_saver=state_saver, state_name=state_names)
      self.assertEqual(len(outputs), len(inputs))

      # Final output comes from _cell(3) which has state size num_units + 3
      for out in outputs:
        self.assertEqual(out.get_shape().as_list(), [batch_size, num_units + 3])

      tf.global_variables_initializer().run()
      input_value = np.random.randn(batch_size, input_size)
      last_states = sess.run(
          list(nest.flatten(state)), feed_dict={inputs[0]: input_value})
      saved_states = sess.run(
          list(state_saver.saved_state.values()),
          feed_dict={inputs[0]: input_value})
      self.assertEqual(8, len(last_states))
      self.assertEqual(8, len(saved_states))
      flat_state_names = nest.flatten(state_names)
      named_saved_states = dict(
          zip(state_saver.saved_state.keys(), saved_states))

      for i in range(8):
        self.assertAllEqual(
            last_states[i],
            named_saved_states[flat_state_names[i]])
示例#13
0
 def _assert_same_shape(input1, input2, double=False):
   flat_input1 = nest.flatten(input1)
   flat_input2 = nest.flatten(input2)
   for inp1, inp2 in zip(flat_input1, flat_input2):
     input_shape = inp1.get_shape().as_list()
     if double:
       input_shape[1] *= 2
     self.assertEqual(input_shape, inp2.get_shape().as_list())
示例#14
0
 def testFlattenDictOrder(self):
   """`flatten` orders dicts by key, including OrderedDicts."""
   ordered = collections.OrderedDict([("d", 3), ("b", 1), ("a", 0), ("c", 2)])
   plain = {"d": 3, "b": 1, "a": 0, "c": 2}
   ordered_flat = nest.flatten(ordered)
   plain_flat = nest.flatten(plain)
   self.assertEqual([0, 1, 2, 3], ordered_flat)
   self.assertEqual([0, 1, 2, 3], plain_flat)
 def _run_targets(self, targets1, targets2=None, run_init=True):
   targets1 = nest.flatten(targets1)
   targets2 = ([] if targets2 is None else nest.flatten(targets2))
   assert len(targets1) == len(targets2) or not targets2
   if run_init:
     init = variables.global_variables_initializer()
     self.evaluate(init)
   return self.evaluate(targets1 + targets2)
示例#16
0
  def run_steps_on_dataset(self, fn, iterator, iterations):
    # Enqueue ops
    shapes = nest.flatten(iterator.output_shapes)
    if any([not s.is_fully_defined() for s in shapes]):
      raise ValueError(
          'TPU currently requires fully defined shapes. Either use '
          'set_shape() on the input tensors or use '
          'dataset.apply(map_and_batch(..., drop_remainder=True)).')
    types = nest.flatten(iterator.output_types)

    def enqueue_ops_fn():
      """Enqueue ops for one iteration."""
      control_deps = []
      sharded_inputs = []
      with ops.device(self._host):
        for _ in range(self._num_cores_per_host):
          # Use control dependencies to ensure a deterministic ordering.
          with ops.control_dependencies(control_deps):
            inputs = nest.flatten(iterator.get_next())
            control_deps.extend(inputs)
            sharded_inputs.append(inputs)

      enqueue_ops = []
      for core_id, shard_input in enumerate(sharded_inputs):
        enqueue_ops.append(
            tpu_ops.infeed_enqueue_tuple(
                inputs=shard_input, shapes=shapes, device_ordinal=core_id))
      return enqueue_ops

    def enqueue_ops_loop_body(i):
      with ops.control_dependencies(enqueue_ops_fn()):
        return i + 1

    with ops.device(self._host):
      enqueue_ops = control_flow_ops.while_loop(
          lambda i: i < iterations,
          enqueue_ops_loop_body,
          [constant_op.constant(0)],
          parallel_iterations=1)

    # Dequeue ops
    def dequeue_fn():
      dequeued = tpu.infeed_dequeue_tuple(dtypes=types, shapes=shapes)
      return nest.pack_sequence_as(iterator.output_shapes, dequeued)

    # Wrap `fn` for repeat.
    run_fn = lambda: fn(dequeue_fn())

    # Repeat
    def iterate_on_tpu():
      return tpu.repeat(iterations, run_fn, [])

    # Re-write and distribute computation.
    tpu_result = tpu.batch_parallel(
        iterate_on_tpu, [], num_shards=self._num_cores_per_host)

    return control_flow_ops.group(tpu_result, enqueue_ops)
示例#17
0
 def wrap_state(self, state):
     dummy = BeamDecoderCellWrapper(None, self.num_classes, self.max_len, self.stop_token, self.beam_size)
     if nest.is_sequence(state):
         batch_size = tf.shape(nest.flatten(state)[0])[0]
         dtype = nest.flatten(state)[0].dtype
     else:
         batch_size = tf.shape(state)[0]
         dtype = state.dtype
     return dummy._create_state(batch_size, dtype, cell_state=state)
示例#18
0
def _check_default_value(shape, default_value, dtype, key):
  """Returns default value as tuple if it's valid, otherwise raises errors.

  This function verifies that `default_value` is compatible with both `shape`
  and `dtype`. If it is not compatible, it raises an error. If it is compatible,
  it casts default_value to a tuple and returns it. `key` is used only
  for error message.

  Args:
    shape: An iterable of integers specifies the shape of the `Tensor`.
    default_value: If a single value is provided, the same value will be applied
      as the default value for every item. If an iterable of values is
      provided, the shape of the `default_value` should be equal to the given
      `shape`.
    dtype: defines the type of values. Default value is `tf.float32`. Must be a
      non-quantized, real integer or floating point type.
    key: A string providing key to look up corresponding `Tensor`.

  Returns:
    A tuple which will be used as default value.

  Raises:
    TypeError: if `default_value` is an iterable but not compatible with `shape`
    TypeError: if `default_value` is not compatible with `dtype`.
    ValueError: if `dtype` is not convertible to `tf.float32`.
  """
  if default_value is None:
    return None

  if isinstance(default_value, int):
    return _create_tuple(shape, default_value)

  if isinstance(default_value, float) and dtype.is_floating:
    return _create_tuple(shape, default_value)

  if callable(getattr(default_value, 'tolist', None)):  # Handles numpy arrays
    default_value = default_value.tolist()

  if nest.is_sequence(default_value):
    if not _is_shape_and_default_value_compatible(default_value, shape):
      raise ValueError(
          'The shape of default_value must be equal to given shape. '
          'default_value: {}, shape: {}, key: {}'.format(
              default_value, shape, key))
    # Check if the values in the list are all integers or are convertible to
    # floats.
    is_list_all_int = all(
        isinstance(v, int) for v in nest.flatten(default_value))
    is_list_has_float = any(
        isinstance(v, float) for v in nest.flatten(default_value))
    if is_list_all_int:
      return _as_tuple(default_value)
    if is_list_has_float and dtype.is_floating:
      return _as_tuple(default_value)
  raise TypeError('default_value must be compatible with dtype. '
                  'default_value: {}, dtype: {}, key: {}'.format(
                      default_value, dtype, key))
示例#19
0
文件: utils.py 项目: ALISCIFP/models
def state_barrier_context(state):
  """Return a context manager that prevents interior ops from running
  unless the whole state has been computed.

  This is to prevent assign race conditions.
  """
  tensors = [x for x in nest.flatten(state) if type(x) == tf.Tensor]
  tarray = [x.flow for x in nest.flatten(state) if hasattr(x, "flow")]
  return tf.control_dependencies(tensors + tarray)
示例#20
0
 def _get_grads_lists_curvature_prop(self, tensors):
   loss_inputs = list(loss.inputs for loss in self._layers.losses)
   transformed_random_signs = self._get_transformed_random_signs()
   grads_flat = gradients_impl.gradients(
       nest.flatten(loss_inputs),
       nest.flatten(tensors),
       grad_ys=nest.flatten(transformed_random_signs))
   grads_all = nest.pack_sequence_as(tensors, grads_flat)
   return tuple((grad,) for grad in grads_all)
示例#21
0
def _tpu_run(strategy, fn, args, kwargs):
  """Common implementation of TPUStrategy.experimental_run_v2."""
  if context.executing_eagerly() and not ops.inside_function():
    raise NotImplementedError(
        "Eager mode not supported in TPUStrategy outside TF functions.")

  if kwargs is None:
    kwargs = {}

  # Used to re-structure flattened output tensors from `tpu.replicate()`
  # into a structured format.
  result = [[]]

  def replicated_fn(replica_id, replica_args, replica_kwargs):
    """Wraps user function to provide replica ID and `Tensor` inputs."""
    with _TPUReplicaContext(strategy, replica_id_in_sync_group=replica_id):
      result[0] = fn(*replica_args, **replica_kwargs)
    return result[0]

  replicate_inputs = []  # By replica.
  for i in range(strategy.num_replicas_in_sync):
    replicate_inputs.append(
        [constant_op.constant(i, dtype=dtypes.int32),
         values.select_replica(i, args),
         values.select_replica(i, kwargs)])

  # Construct and pass `maximum_shapes` so that we could support dynamic
  # shapes using dynamic padder.
  if replicate_inputs:
    maximum_shapes = []
    flattened_list = nest.flatten(replicate_inputs[0])
    for input_tensor in flattened_list:
      maximum_shapes.append(input_tensor.get_shape())
    maximum_shapes = nest.pack_sequence_as(replicate_inputs[0],
                                           maximum_shapes)
  else:
    maximum_shapes = None

  with strategy.scope():
    replicate_outputs = tpu.replicate(replicated_fn, replicate_inputs,
                                      maximum_shapes=maximum_shapes)

  # Remove all no ops that may have been added during 'tpu.replicate()'
  if isinstance(result[0], list):
    result[0] = [
        output for output in result[0] if tensor_util.is_tensor(output)
    ]

  # Workaround for `tpu.replicate` behaviour when single `Tensor` returned.
  replicate_outputs = [
      nest.pack_sequence_as(result[0], nest.flatten(replica_output))
      for replica_output in replicate_outputs
  ]

  device_map = strategy.extended._device_map  # pylint: disable=protected-access
  return values.regroup(device_map, replicate_outputs)
示例#22
0
  def __call__(self, inputs, *args, **kwargs):
    """Wraps `call`, applying pre- and post-processing steps.

    Arguments:
      inputs: input tensor(s).
      *args: additional positional arguments to be passed to `self.call`.
      **kwargs: additional keyword arguments to be passed to `self.call`.
        **Note**: kwarg `scope` is reserved for use by the layer.
    Returns:
      Output tensor(s).
    """
    self._set_scope(kwargs.pop('scope', None))

    # Ensure the Layer, if being reused, is working with inputs from
    # the same graph as where it was created.
    try:
      ops._get_graph_from_inputs(nest.flatten(inputs), graph=self.graph)  # pylint: disable=protected-access
    except ValueError as e:
      raise ValueError('Input graph and Layer graph are not the same: %s' % e)

    with vs.variable_scope(self._scope,
                           reuse=self.built or self._reuse) as scope:
      with ops.name_scope(scope.original_name_scope):
        if not self.built:
          # Check input assumptions set before layer building, e.g. input rank.
          self._assert_input_compatibility(inputs)
          input_list = [
              ops.convert_to_tensor(x, name='input')
              for x in nest.flatten(inputs)]
          input_shapes = [x.get_shape() for x in input_list]
          if len(input_shapes) == 1:
            self.build(input_shapes[0])
          else:
            self.build(input_shapes)
        if 'scope' in tf_inspect.getargspec(self.call).args:
          kwargs['scope'] = scope
        # Check input assumptions set after layer building, e.g. input shape.
        self._assert_input_compatibility(inputs)
        outputs = self.call(inputs, *args, **kwargs)

        # Apply activity regularization.
        # Note that it should be applied every time the layer creates a new
        # output, since it is output-specific.
        if hasattr(self, 'activity_regularizer') and self.activity_regularizer:
          output_list = _to_list(outputs)
          for output in output_list:
            with ops.name_scope('ActivityRegularizer'):
              activity_regularization = self.activity_regularizer(output)
            self.add_loss(activity_regularization)
            _add_elements_to_collection(
                activity_regularization, ops.GraphKeys.REGULARIZATION_LOSSES)

    # Update global default collections.
    _add_elements_to_collection(self.updates, ops.GraphKeys.UPDATE_OPS)
    self.built = True
    return outputs
示例#23
0
def _prepare_memory(memory, memory_sequence_length, check_inner_dims_defined):
  """Convert to tensor and possibly mask `memory`.

  Args:
    memory: `Tensor`, shaped `[batch_size, max_time, ...]`.
    memory_sequence_length: `int32` `Tensor`, shaped `[batch_size]`.
    check_inner_dims_defined: Python boolean.  If `True`, the `memory`
      argument's shape is checked to ensure all but the two outermost
      dimensions are fully defined.

  Returns:
    A (possibly masked), checked, new `memory`.

  Raises:
    ValueError: If `check_inner_dims_defined` is `True` and not
      `memory.shape[2:].is_fully_defined()`.
  """
  memory = nest.map_structure(
      lambda m: ops.convert_to_tensor(m, name="memory"), memory)
  if memory_sequence_length is not None:
    memory_sequence_length = ops.convert_to_tensor(
        memory_sequence_length, name="memory_sequence_length")
  if check_inner_dims_defined:
    def _check_dims(m):
      if not m.get_shape()[2:].is_fully_defined():
        raise ValueError("Expected memory %s to have fully defined inner dims, "
                         "but saw shape: %s" % (m.name, m.get_shape()))
    nest.map_structure(_check_dims, memory)
  if memory_sequence_length is None:
    seq_len_mask = None
  else:
    seq_len_mask = array_ops.sequence_mask(
        memory_sequence_length,
        maxlen=array_ops.shape(nest.flatten(memory)[0])[1],
        dtype=nest.flatten(memory)[0].dtype)
    seq_len_batch_size = (
        memory_sequence_length.shape[0].value
        or array_ops.shape(memory_sequence_length)[0])
  def _maybe_mask(m, seq_len_mask):
    rank = m.get_shape().ndims
    rank = rank if rank is not None else array_ops.rank(m)
    extra_ones = array_ops.ones(rank - 2, dtype=dtypes.int32)
    m_batch_size = m.shape[0].value or array_ops.shape(m)[0]
    if memory_sequence_length is not None:
      message = ("memory_sequence_length and memory tensor batch sizes do not "
                 "match.")
      with ops.control_dependencies([
          check_ops.assert_equal(
              seq_len_batch_size, m_batch_size, message=message)]):
        seq_len_mask = array_ops.reshape(
            seq_len_mask,
            array_ops.concat((array_ops.shape(seq_len_mask), extra_ones), 0))
        return m * seq_len_mask
    else:
      return m
  return nest.map_structure(lambda m: _maybe_mask(m, seq_len_mask), memory)
示例#24
0
 def run_and_assert_equal(self, targets1, targets2, atol=1e-4, rtol=1e-4):
   targets1 = nest.flatten(targets1)
   targets2 = nest.flatten(targets2)
   assert len(targets1) == len(targets2)
   init = variables.global_variables_initializer()
   self.evaluate(init)
   outputs = self.evaluate(targets1 + targets2)
   n = len(outputs) // 2
   for i in range(n):
     self.assertAllClose(outputs[i], outputs[i + n], rtol=rtol, atol=atol)
示例#25
0
 def testAssertions(self):
   a = tracking.Checkpointable()
   a.l = {"k": [numpy.zeros([2, 2])]}
   self.assertAllEqual(nest.flatten({"k": [numpy.zeros([2, 2])]}),
                       nest.flatten(a.l))
   self.assertAllClose({"k": [numpy.zeros([2, 2])]}, a.l)
   nest.map_structure(self.assertAllClose, a.l, {"k": [numpy.zeros([2, 2])]})
   a.tensors = {"k": [array_ops.ones([2, 2]), array_ops.zeros([3, 3])]}
   self.assertAllClose({"k": [numpy.ones([2, 2]), numpy.zeros([3, 3])]},
                       self.evaluate(a.tensors))
示例#26
0
def _make_indexed_slices_indices_types_match(true_graph, false_graph):
  """Match dtype of IndexedSlices.indices in outputs of {true|false}_graphs."""
  indexed_slice_indices = []
  current_index = 0
  true_outputs_flat_with_composites = nest.flatten(
      true_graph.structured_outputs, expand_composites=False)
  false_outputs_flat_with_composites = nest.flatten(
      false_graph.structured_outputs, expand_composites=False)
  # Store indices of IndexedSlices.indices in `indexed_slice_indices`.
  for idx, (true_out, false_out) in enumerate(
      zip(true_outputs_flat_with_composites,
          false_outputs_flat_with_composites)):
    if isinstance(true_out, ops.IndexedSlices) != isinstance(
        false_out, ops.IndexedSlices):
      raise TypeError("Cannot reconcile tf.cond %i-th outputs:\n"
                      "  true_fn returned:  %s\n"
                      "  false_fn returned: %s" % (idx, true_out, false_out))
    if isinstance(true_out, ops.IndexedSlices):
      # indices is the second component of the composite tensor.
      indexed_slice_indices.append(current_index + 1)
    if nest.is_sequence_or_composite(true_out):
      current_index += len(nest.flatten(true_out, expand_composites=True))
    else:
      current_index += 1

  if not indexed_slice_indices:
    return

  if current_index != len(true_graph.outputs):
    raise ValueError("Insufficient elements in true_graph.outputs.\n"
                     "Expected: %i\n"
                     "Actual: %i" % (current_index, len(true_graph.outputs)))

  # Cast indices with mismatching types to int64.
  for index in indexed_slice_indices:
    if true_graph.outputs[index].dtype not in (dtypes.int32, dtypes.int64):
      raise TypeError("Type of IndexedSlices.indices must be int32 or int64. "
                      "Found: %s" % str(true_graph.outputs[index].dtype))
    if false_graph.outputs[index].dtype not in (dtypes.int32, dtypes.int64):
      raise TypeError("Type of IndexedSlices.indices must be int32 or int64. "
                      "Found: %s" % str(false_graph.outputs[index].dtype))
    if true_graph.outputs[index].dtype != false_graph.outputs[index].dtype:
      if false_graph.outputs[index].dtype == dtypes.int32:
        with false_graph.as_default():
          false_graph.outputs[index] = math_ops.cast(false_graph.outputs[index],
                                                     dtypes.int64)
      else:
        with true_graph.as_default():
          true_graph.outputs[index] = math_ops.cast(true_graph.outputs[index],
                                                    dtypes.int64)

  true_graph.structured_outputs = func_graph_module.pack_sequence_as(
      true_graph.structured_outputs, true_graph.outputs)
  false_graph.structured_outputs = func_graph_module.pack_sequence_as(
      false_graph.structured_outputs, false_graph.outputs)
示例#27
0
  def gradient(self, target, sources, output_gradients=None):
    """Computes the gradient using operations recorded in context of this tape.

    Args:
      target: Tensor (or list of tensors) to be differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      output_gradients: a list of gradients, one for each element of
        target. Defaults to None.

    Returns:
      a list or nested structure of Tensors (or IndexedSlices, or None),
      one for each element in `sources`. Returned structure is the same as
      the structure of `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once on a non-persistent tape.
    """
    if self._tape is None:
      raise RuntimeError("GradientTape.gradient can only be called once on "
                         "non-persistent tapes.")
    if self._recording:
      if not self._persistent:
        self._pop_tape()
      else:
        logging.log_first_n(logging.WARN,
                            "Calling GradientTape.gradient on a persistent "
                            "tape inside it's context is significantly less "
                            "efficient than calling it outside the context (it "
                            "causes the gradient ops to be recorded on the "
                            "tape, leading to increased CPU and memory usage). "
                            "Only call GradientTape.gradient inside the "
                            "context if you actually want to trace the "
                            "gradient in order to compute higher order "
                            "derrivatives.", 1)

    flat_sources = nest.flatten(sources)
    flat_sources = [_handle_or_self(x) for x in flat_sources]

    if output_gradients is not None:
      output_gradients = [None if x is None else ops.convert_to_tensor(x)
                          for x in nest.flatten(output_gradients)]

    flat_grad = imperative_grad.imperative_grad(
        self._tape,
        nest.flatten(target),
        flat_sources,
        output_gradients=output_gradients)

    if not self._persistent:
      self._tape = None

    grad = nest.pack_sequence_as(sources, flat_grad)
    return grad
示例#28
0
 def insert(self, keys, values):
   nest.assert_same_structure(self._hash_tables, values)
   # Avoid race conditions by requiring that all inputs are computed before any
   # inserts happen (an issue if one key's update relies on another's value).
   values_flat = [array_ops.identity(value) for value in nest.flatten(values)]
   with ops.control_dependencies(values_flat):
     insert_ops = [hash_table.insert(keys, value)
                   for hash_table, value
                   in zip(nest.flatten(self._hash_tables),
                          values_flat)]
   return control_flow_ops.group(*insert_ops)
示例#29
0
    def _create(self, encoder_output, decoder_state_size, **kwargs):
        """ Creates decoder's initial RNN states according to
        `decoder_state_size`.

        Do linear transformations to encoder output/state and map the
        structure to `decoder_state_size`.
        If params[`bridge_input`] == "output", first average the encoder
        output tensor over timesteps.
        Args:
            encoder_output: An instance of `collections.namedtuple`
              from `Encoder.encode()`.
            decoder_state_size: RNN decoder state size.
            **kwargs:

        Returns: The decoder states with the structure determined
          by `decoder_state_size`.

        Raises:
            ValueError: if `encoder_output` has no attribute named
              params[`bridge_input`].
        """
        if not hasattr(encoder_output, self.params["bridge_input"]):
            raise ValueError("encoder output has not attribute: {}, "
                             "only final_state and outputs available"
                             .format(self.params["bridge_input"]))
        if self.params["bridge_input"] == "outputs":
            # [batch_size, max_time, num_units]
            context = encoder_output.outputs
            mask = tf.sequence_mask(
                lengths=tf.to_int32(encoder_output.attention_length),
                maxlen=tf.shape(context)[1],
                dtype=tf.float32)
            # [batch_size, num_units]
            bridge_input = tf.truediv(
                tf.reduce_sum(context * tf.expand_dims(mask, 2), axis=1),
                tf.expand_dims(
                    tf.to_float(encoder_output.attention_length), 1))
        elif self.params["bridge_input"] == "final_states":
            bridge_input = nest.flatten(_final_states(encoder_output.final_states))
            bridge_input = tf.concat(bridge_input, 1)
        else:
            raise ValueError("Unrecognized value of bridge_input: {}, "
                             "should be outputs or final_state".format(self.params["bridge_input"]))
        state_size_splits = nest.flatten(decoder_state_size)
        total_decoder_state_size = sum(state_size_splits)
        # [batch_size, total_decoder_state_size]
        init_state = fflayer(inputs=bridge_input,
                             output_size=total_decoder_state_size,
                             activation=self._activation,
                             name="init_state_trans")
        init_state = nest.pack_sequence_as(
            decoder_state_size,
            tf.split(init_state, state_size_splits, axis=1))
        return init_state
示例#30
0
  def _assert_correct_outputs(self, initial_state_):
    nest.assert_same_structure(initial_state_, self.decoder_cell.state_size)
    nest.assert_same_structure(initial_state_, self.encoder_outputs.final_state)

    encoder_state_flat = nest.flatten(self.encoder_outputs.final_state)
    with self.test_session() as sess:
      encoder_state_flat_ = sess.run(encoder_state_flat)

    initial_state_flat_ = nest.flatten(initial_state_)
    for e_dec, e_enc in zip(initial_state_flat_, encoder_state_flat_):
      np.testing.assert_array_equal(e_dec, e_enc)
示例#31
0
 def vjp(dy=None):
   if dy is not None:
     dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)]
   return imperative_grad.imperative_grad(
       this_tape, nest.flatten(result), sources, output_gradients=dy)
示例#32
0
 def update(self, x, y=None):
     x = nest.flatten(x)[0].numpy()
     self.sum += x
     self.square_sum += np.square(x)
     self.count += 1
     self.shape = x.shape
示例#33
0
 def transform(self, x, fit=False):
     sentence = nest.flatten(x)[0].numpy().decode('utf-8')
     data = self._vectorizer.transform([sentence]).toarray()
     if self.selector:
         data = self.selector.transform(data).astype('float32')
     return data[0]
示例#34
0
 def _flat_tensor_specs(self) -> List[TypeSpec]:
     """A list of TensorSpecs compatible with self._to_tensor_list(v)."""
     component_flat_tensor_specs = nest.map_structure(
         functools.partial(get_batchable_flat_tensor_specs,
                           context_spec=self), self._component_specs)
     return nest.flatten(component_flat_tensor_specs)
示例#35
0
def build_graph(filename, mode, configuration):

    if mode >= 3:
        raise ValueError("Mode parameter is not correct.")

    #creates a default graph
    g = tf.Graph()

    #override the current default graph
    with g.as_default():

        inputs, labels, length = None

        #INIT PLACEHOLDER
        if mode == 0 or mode == 1:
            inputs, labels, length = sequence_example_utilities.build_input(filename, batches=configuration.batch_size, nr_classes=configuration.encoder_decoder.keys_number())
        elif mode == 2:
            inputs = _init_generate(configuration, configuration.encoder_decoder.get_input_size())


        #MAKE CELL
        cell = _build_rnn_cell(configuration.rnn_layer_size, configuration.attention_length, 0.8, 1.0)
        initial_state = cell.zero_state(configuration.batch_size, tf.float32)

        #MAKE RNN
        outputs, final_state = tf.nn.dynamic_rnn(cell=cell,
                                                 inputs=inputs,
                                                 sequence_length=length,
                                                 dtype = tf.int64,
                                                 initial_state=initial_state)

        #CREATE THE WEIGHTED MATRIX, RESULTING IN LOGITS
        #reduce a dimension added by the batched
        flatten_outputs = _flat_seq(outputs, length)
        flatten_logits = tf.contrib.layers.fully_connected(flatten_outputs,
                                                           configuration.encoder_decoder.keys_number(),
                                                           activation_fn = None)

        if mode == 0 or mode == 1:

            #COMPARE WITH LOGITS USING SOFTMAX
            flatten_labels = _flat_seq(labels, length)
            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=flatten_labels,
                                                                           logits=flatten_logits)

            #CORRECT LOGITS
            flatten_prediction = tf.argmax(flatten_logits, axis=1)
            correct_predictions = tf.to_float(tf.equal(flatten_labels, flatten_prediction))
            event_positions = tf.to_float(tf.not_equal(flatten_labels, constants.NO_CHORD_EVENT))
            no_event_positions = tf.to_float(tf.equal(flatten_labels, constants.NO_CHORD_EVENT))

            if mode == 0:

                #compute rnn parameters
                loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
                perplexity = tf.reduce_mean(tf.exp(cross_entropy))
                accuracy = tf.reduce_mean(correct_predictions)
                event_accuracy = (
                    tf.reduce_sum(correct_predictions * event_positions) /
                    tf.reduce_sum(event_positions))
                no_event_accuracy = (
                    tf.reduce_sum(correct_predictions * no_event_positions) /
                    tf.reduce_sum(no_event_positions))

                optimizer = tf.train.AdagradOptimizer(learning_rate=configuration.learning_rate)

                train = tf.contrib.slim.learning.create_train_op(loss, optimizer)
                tf.add_to_collection('train_node', train)

                train_param = {
                    'loss': loss,
                    'perplexity': perplexity,
                    'accuracy': accuracy,
                    'event_accuracy': event_accuracy,
                    'no_event_accuracy': no_event_accuracy,
                }

                for k in train_param:
                    tf.summary.scalar(k, train_param[k])
                    tf.add_to_collection(k, train_param[k])

            elif mode == 1:

                #USE TF.SLIM FOR EVALUATING AFTER TRAIN AND SHOW RESULTS IN TENSOBOARD
                eval_param, update_ops = tf.contrib.metrics.aggregate_metric_map(
                    {
                        'loss': tf.metrics.mean(cross_entropy),
                        'metrics/accuracy': tf.metrics.accuracy(flatten_labels, flatten_prediction),
                        'metrics/per_class_accuracy':tf.metrics.mean_per_class_accuracy(
                                flatten_labels, flatten_prediction, configuration.encoder_decoder.keys_number),
                        'metrics/event_accuracy': tf.metrics.recall(event_positions, correct_predictions),
                        'metrics/no_event_accuracy': tf.metrics.recall(no_event_positions, correct_predictions),
                        'metrics/perplexity': tf.metrics.mean(tf.exp(cross_entropy)),
                    })
                for updates_op in update_ops.values():
                    tf.add_to_collection('eval_node', updates_op)

                for k in eval_param:
                    #see them in tensorboard
                    tf.summary.scalar(k, eval_param[k])
                    tf.add_to_collection(k, eval_param[k])

            elif mode == 2:

                res = tf.placeholder(tf.int64, [])
                flatten_softmax = tf.nn.softmax(
                    tf.div(flatten_logits, tf.fill([configuration.encoder_decoder.keys_number], res)))
                softmax = tf.reshape(flatten_softmax, [configuration.batch_size, -1, configuration.encoder_decoder.keys_number])

                tf.add_to_collection('inputs', inputs)
                tf.add_to_collection('temperature', res)
                tf.add_to_collection('softmax', softmax)

                for state in flatten(initial_state):
                    tf.add_to_collection('initial_state', state)
                for state in flatten(final_state):
                    tf.add_to_collection('final_state', state)

        return g
示例#36
0
 def transform(self, x, fit=False):
     sentence = nest.flatten(x)[0].numpy().decode('utf-8')
     sequence = self._tokenizer.texts_to_sequences(sentence)[0]
     sequence = tf.keras.preprocessing.sequence.pad_sequences(
         sequence, self.max_len or self._max_len)
     return sequence
示例#37
0
def is_tensor_or_tensor_list(v):
    v = nest.flatten(v)
    if v and isinstance(v[0], ops.Tensor):
        return True
    else:
        return False
示例#38
0
    def tpu_function(args, kwargs):
      """TF Function used to replicate the user computation."""
      if kwargs is None:
        kwargs = {}

      # Remove None at the end of args as they are not replicatable
      # If there are None in the middle we can't do anything about it
      # so let those cases fail.
      # For example when Keras model predict is used they pass the targets as
      # None. We want to handle it here so all client libraries don't have to
      # do this as other strategies can handle None values better.
      while args and args[-1] is None:
        args = args[:-1]

      # Used to re-structure flattened output tensors from `tpu.replicate()`
      # into a structured format.
      result = [[]]

      def replicated_fn(replica_id, replica_args, replica_kwargs):
        """Wraps user function to provide replica ID and `Tensor` inputs."""
        with _TPUReplicaContext(strategy, replica_id_in_sync_group=replica_id):
          result[0] = fn(*replica_args, **replica_kwargs)
        return result[0]

      replicate_inputs = []  # By replica.
      for i in range(strategy.num_replicas_in_sync):
        replicate_inputs.append(
            [constant_op.constant(i, dtype=dtypes.int32),
             values.select_replica(i, args),
             values.select_replica(i, kwargs)])

      # Construct and pass `maximum_shapes` so that we could support dynamic
      # shapes using dynamic padder.
      if options.experimental_enable_dynamic_batch_size and replicate_inputs:
        maximum_shapes = []
        flattened_list = nest.flatten(replicate_inputs[0])
        for input_tensor in flattened_list:
          if tensor_util.is_tensor(input_tensor):
            rank = input_tensor.get_shape().rank
          else:
            rank = np.ndim(input_tensor)
          maximum_shape = tensor_shape.TensorShape([None] * rank)
          maximum_shapes.append(maximum_shape)
        maximum_shapes = nest.pack_sequence_as(replicate_inputs[0],
                                               maximum_shapes)
      else:
        maximum_shapes = None

      if options.experimental_bucketizing_dynamic_shape:
        padding_spec = tpu.PaddingSpec.POWER_OF_TWO
      else:
        padding_spec = None

      with strategy.scope():
        replicate_outputs = tpu.replicate(
            replicated_fn,
            replicate_inputs,
            device_assignment=self._device_assignment,
            maximum_shapes=maximum_shapes,
            padding_spec=padding_spec)

      # Remove all no ops that may have been added during 'tpu.replicate()'
      if isinstance(result[0], list):
        result[0] = [
            output for output in result[0] if not isinstance(
                output, ops.Operation)
        ]

      # Workaround for `tpu.replicate` behaviour when single `Tensor` returned.
      if result[0] is None or isinstance(result[0], ops.Operation):
        replicate_outputs = [None] * len(replicate_outputs)
      else:
        replicate_outputs = [
            nest.pack_sequence_as(result[0], nest.flatten(replica_output))
            for replica_output in replicate_outputs
        ]
      return values.regroup(replicate_outputs)
示例#39
0
 def update(self, x, y=None):
     # TODO: Implement a sequential version fit for both
     #  TfidfVectorizer and SelectKBest
     self._texts.append(nest.flatten(x)[0].numpy().decode('utf-8'))
示例#40
0
def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
         swap_memory=False, infer_shape=True, reverse=False, name=None):
  """scan on the list of tensors unpacked from `elems` on dimension 0.

  The simplest version of `scan` repeatedly applies the callable `fn` to a
  sequence of elements from first to last. The elements are made of the tensors
  unpacked from `elems` on dimension 0. The callable fn takes two tensors as
  arguments. The first argument is the accumulated value computed from the
  preceding invocation of fn. If `initializer` is None, `elems` must contain
  at least one element, and its first element is used as the initializer.

  Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
  of the result tensor is `[len(values)] + fn(initializer, values[0]).shape`.
  If reverse=True, it's fn(initializer, values[-1]).shape.

  This method also allows multi-arity `elems` and accumulator.  If `elems`
  is a (possibly nested) list or tuple of tensors, then each of these tensors
  must have a matching first (unpack) dimension.  The second argument of
  `fn` must match the structure of `elems`.

  If no `initializer` is provided, the output structure and dtypes of `fn`
  are assumed to be the same as its input; and in this case, the first
  argument of `fn` must match the structure of `elems`.

  If an `initializer` is provided, then the output of `fn` must have the same
  structure as `initializer`; and the first argument of `fn` must match
  this structure.

  For example, if `elems` is `(t1, [t2, t3])` and `initializer` is
  `[i1, i2]` then an appropriate signature for `fn` in `python2` is:
  `fn = lambda (acc_p1, acc_p2), (t1, [t2, t3]):` and `fn` must return a list,
  `[acc_n1, acc_n2]`.  An alternative correct signature for `fn`, and the
   one that works in `python3`, is:
  `fn = lambda a, t:`, where `a` and `t` correspond to the input tuples.

  Args:
    fn: The callable to be performed.  It accepts two arguments.  The first
      will have the same structure as `initializer` if one is provided,
      otherwise it will have the same structure as `elems`.  The second
      will have the same (possibly nested) structure as `elems`.  Its output
      must have the same structure as `initializer` if one is provided,
      otherwise it must have the same structure as `elems`.
    elems: A tensor or (possibly nested) sequence of tensors, each of which
      will be unpacked along their first dimension.  The nested sequence
      of the resulting slices will be the first argument to `fn`.
    initializer: (optional) A tensor or (possibly nested) sequence of tensors,
      initial value for the accumulator, and the expected output type of `fn`.
    parallel_iterations: (optional) The number of iterations allowed to run
      in parallel.
    back_prop: (optional) True enables support for back propagation.
    swap_memory: (optional) True enables GPU-CPU memory swapping.
    infer_shape: (optional) False disables tests for consistent output shapes.
    reverse: (optional) True scans the tensor last to first (instead of first
      to last).
    name: (optional) Name prefix for the returned tensors.

  Returns:
    A tensor or (possibly nested) sequence of tensors.  Each tensor packs the
    results of applying `fn` to tensors unpacked from `elems` along the first
    dimension, and the previous accumulator value(s), from first to last (or
    last to first, if `reverse=True`).

  Raises:
    TypeError: if `fn` is not callable or the structure of the output of
      `fn` and `initializer` do not match.
    ValueError: if the lengths of the output of `fn` and `initializer`
      do not match.

  Examples:
    ```python
    elems = np.array([1, 2, 3, 4, 5, 6])
    sum = scan(lambda a, x: a + x, elems)
    # sum == [1, 3, 6, 10, 15, 21]
    sum = scan(lambda a, x: a + x, elems, reverse=True)
    # sum == [22, 21, 18, 15, 11, 6]
    ```

    ```python
    elems = np.array([1, 2, 3, 4, 5, 6])
    initializer = np.array(0)
    sum_one = scan(
        lambda a, x: x[0] - x[1] + a, (elems + 1, elems), initializer)
    # sum_one == [1, 2, 3, 4, 5, 6]
    ```

    ```python
    elems = np.array([1, 0, 0, 0, 0, 0])
    initializer = (np.array(0), np.array(1))
    fibonaccis = scan(lambda a, _: (a[1], a[0] + a[1]), elems, initializer)
    # fibonaccis == ([1, 1, 2, 3, 5, 8], [1, 2, 3, 5, 8, 13])
    ```
  """
  if not callable(fn):
    raise TypeError("fn must be callable.")

  input_is_sequence = nest.is_sequence(elems)
  input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]
  def input_pack(x):
    return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]

  if initializer is None:
    output_is_sequence = input_is_sequence
    output_flatten = input_flatten
    output_pack = input_pack
  else:
    output_is_sequence = nest.is_sequence(initializer)
    output_flatten = lambda x: nest.flatten(x) if output_is_sequence else [x]
    def output_pack(x):
      return (nest.pack_sequence_as(initializer, x)
              if output_is_sequence else x[0])

  elems_flat = input_flatten(elems)

  in_graph_mode = not context.executing_eagerly()
  with ops.name_scope(name, "scan", elems_flat):
    # TODO(akshayka): Remove the in_graph_mode check once caching devices are
    # supported in Eager
    if in_graph_mode:
      # Any get_variable calls in fn will cache the first call locally
      # and not issue repeated network I/O requests for each iteration.
      varscope = vs.get_variable_scope()
      varscope_caching_device_was_none = False
      if varscope.caching_device is None:
        # TODO(ebrevdo): Change to using colocate_with here and in other
        # methods.
        varscope.set_caching_device(lambda op: op.device)
        varscope_caching_device_was_none = True

    # Convert elems to tensor array.
    elems_flat = [
        ops.convert_to_tensor(elem, name="elem") for elem in elems_flat]

    # Convert elems to tensor array. n may be known statically.
    n = (tensor_shape.dimension_value(elems_flat[0].shape[0])
         or array_ops.shape(elems_flat[0])[0])

    # TensorArrays are always flat
    elems_ta = [
        tensor_array_ops.TensorArray(dtype=elem.dtype, size=n,
                                     dynamic_size=False,
                                     infer_shape=True)
        for elem in elems_flat]
    # Unpack elements
    elems_ta = [
        elem_ta.unstack(elem) for elem_ta, elem in zip(elems_ta, elems_flat)]

    if initializer is None:
      a_flat = [elem.read(n - 1 if reverse else 0) for elem in elems_ta]
      i = constant_op.constant(1)
    else:
      initializer_flat = output_flatten(initializer)
      a_flat = [ops.convert_to_tensor(init) for init in initializer_flat]
      i = constant_op.constant(0)

    # Create a tensor array to store the intermediate values.
    accs_ta = [
        tensor_array_ops.TensorArray(
            dtype=init.dtype, size=n,
            element_shape=init.shape if infer_shape else None,
            dynamic_size=False,
            infer_shape=infer_shape)
        for init in a_flat]

    if initializer is None:
      accs_ta = [acc_ta.write(n - 1 if reverse else 0, a)
                 for (acc_ta, a) in zip(accs_ta, a_flat)]

    def compute(i, a_flat, tas):
      """The loop body of scan.

      Args:
        i: the loop counter.
        a_flat: the accumulator value(s), flattened.
        tas: the output accumulator TensorArray(s), flattened.

      Returns:
        [i + 1, a_flat, tas]: the updated counter + new accumulator values +
          updated TensorArrays

      Raises:
        TypeError: if initializer and fn() output structure do not match
        ValueType: if initializer and fn() output lengths do not match
      """
      packed_elems = input_pack([elem_ta.read(i) for elem_ta in elems_ta])
      packed_a = output_pack(a_flat)
      a_out = fn(packed_a, packed_elems)
      nest.assert_same_structure(
          elems if initializer is None else initializer, a_out)
      flat_a_out = output_flatten(a_out)
      tas = [ta.write(i, value) for (ta, value) in zip(tas, flat_a_out)]
      if reverse:
        next_i = i - 1
      else:
        next_i = i + 1
      return (next_i, flat_a_out, tas)

    if reverse:
      initial_i = n - 1 - i
      condition = lambda i, _1, _2: i >= 0
    else:
      initial_i = i
      condition = lambda i, _1, _2: i < n
    _, _, r_a = control_flow_ops.while_loop(
        condition, compute, (initial_i, a_flat, accs_ta),
        parallel_iterations=parallel_iterations,
        back_prop=back_prop, swap_memory=swap_memory,
        maximum_iterations=n)

    results_flat = [r.stack() for r in r_a]

    n_static = tensor_shape.Dimension(tensor_shape.dimension_value(
        elems_flat[0].get_shape().with_rank_at_least(1)[0]))
    for elem in elems_flat[1:]:
      n_static.merge_with(tensor_shape.Dimension(tensor_shape.dimension_value(
          elem.get_shape().with_rank_at_least(1)[0])))
    for r in results_flat:
      r.set_shape(tensor_shape.TensorShape(n_static).concatenate(
          r.get_shape()[1:]))

    # TODO(akshayka): Remove the in_graph_mode check once caching devices are
    # supported in Eager
    if in_graph_mode and varscope_caching_device_was_none:
      varscope.set_caching_device(None)

    return output_pack(results_flat)
示例#41
0
def raw_rnn(cell, loop_fn, parallel_iterations=None, swap_memory=False, scope=None):
    """
    raw_rnn adapted from the original tensorflow implementation
    (https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/rnn.py)
    to emit arbitrarily nested states for each time step (concatenated along the time axis)
    in addition to the outputs at each timestep and the final state

    returns (
        states for all timesteps,
        outputs for all timesteps,
        final cell state,
    )
    """
    assert_like_rnncell("Raw rnn cell",cell)

    if not callable(loop_fn):
        raise TypeError("loop_fn must be a callable")

    parallel_iterations = parallel_iterations or 32

    # Create a new scope in which the caching device is either
    # determined by the parent scope, or is set to place the cached
    # Variable using the same placement as for the rest of the RNN.
    with vs.variable_scope(scope or "rnn") as varscope:
        if is_in_graph_mode.IS_IN_GRAPH_MODE():
            if varscope.caching_device is None:
                varscope.set_caching_device(lambda op: op.device)

        time = constant_op.constant(0, dtype=dtypes.int32)
        (elements_finished, next_input, initial_state, emit_structure,
         init_loop_state) = loop_fn(time, None, None, None)
        flat_input = nest.flatten(next_input)

        # Need a surrogate loop state for the while_loop if none is available.
        loop_state = (init_loop_state if init_loop_state is not None
                      else constant_op.constant(0, dtype=dtypes.int32))

        input_shape = [input_.get_shape() for input_ in flat_input]
        static_batch_size = input_shape[0][0]

        for input_shape_i in input_shape:
            # Static verification that batch sizes all match
            static_batch_size.merge_with(input_shape_i[0])

        batch_size = static_batch_size.value
        const_batch_size = batch_size
        if batch_size is None:
            batch_size = array_ops.shape(flat_input[0])[0]

        nest.assert_same_structure(initial_state, cell.state_size)
        state = initial_state
        flat_state = nest.flatten(state)
        flat_state = [ops.convert_to_tensor(s) for s in flat_state]
        state = nest.pack_sequence_as(structure=state,
                                      flat_sequence=flat_state)

        if emit_structure is not None:
            flat_emit_structure = nest.flatten(emit_structure)
            flat_emit_size = [emit.shape if emit.shape.is_fully_defined() else
                              array_ops.shape(emit) for emit in flat_emit_structure]
            flat_emit_dtypes = [emit.dtype for emit in flat_emit_structure]
        else:
            emit_structure = cell.output_size
            flat_emit_size = nest.flatten(emit_structure)
            flat_emit_dtypes = [flat_state[0].dtype] * len(flat_emit_size)

        flat_state_size = [s.shape if s.shape.is_fully_defined() else
                           array_ops.shape(s) for s in flat_state]
        flat_state_dtypes = [s.dtype for s in flat_state]

        flat_emit_ta = [
            tensor_array_ops.TensorArray(
                dtype=dtype_i,
                dynamic_size=True,
                element_shape=(tensor_shape.TensorShape([const_batch_size])
                               .concatenate(_maybe_tensor_shape_from_tensor(size_i))),
                size=0,
                name="rnn_output_%d" % i
            )
            for i, (dtype_i, size_i) in enumerate(zip(flat_emit_dtypes, flat_emit_size))
        ]
        emit_ta = nest.pack_sequence_as(structure=emit_structure, flat_sequence=flat_emit_ta)
        flat_zero_emit = [
            array_ops.zeros(_concat(batch_size, size_i), dtype_i)
            for size_i, dtype_i in zip(flat_emit_size, flat_emit_dtypes)]

        zero_emit = nest.pack_sequence_as(structure=emit_structure, flat_sequence=flat_zero_emit)

        flat_state_ta = [
            tensor_array_ops.TensorArray(
                dtype=dtype_i,
                dynamic_size=True,
                element_shape=(tensor_shape.TensorShape([const_batch_size])
                               .concatenate(_maybe_tensor_shape_from_tensor(size_i))),
                size=0,
                name="rnn_state_%d" % i
            )
            for i, (dtype_i, size_i) in enumerate(zip(flat_state_dtypes, flat_state_size))
        ]
        state_ta = nest.pack_sequence_as(structure=state, flat_sequence=flat_state_ta)

        def condition(unused_time, elements_finished, *_):
            return math_ops.logical_not(math_ops.reduce_all(elements_finished))

        def body(time, elements_finished, current_input, state_ta, emit_ta, state, loop_state):
            (next_output, cell_state) = cell(current_input, state)

            nest.assert_same_structure(state, cell_state)
            nest.assert_same_structure(cell.output_size, next_output)

            next_time = time + 1
            (next_finished, next_input, next_state, emit_output,
             next_loop_state) = loop_fn(next_time, next_output, cell_state, loop_state)

            nest.assert_same_structure(state, next_state)
            nest.assert_same_structure(current_input, next_input)
            nest.assert_same_structure(emit_ta, emit_output)

            # If loop_fn returns None for next_loop_state, just reuse the previous one.
            loop_state = loop_state if next_loop_state is None else next_loop_state

            def _copy_some_through(current, candidate):
                """Copy some tensors through via array_ops.where."""
                def copy_fn(cur_i, cand_i):
                    # TensorArray and scalar get passed through.
                    if isinstance(cur_i, tensor_array_ops.TensorArray):
                        return cand_i
                    if cur_i.shape.ndims == 0:
                        return cand_i
                    # Otherwise propagate the old or the new value.
                    with ops.colocate_with(cand_i):
                        return array_ops.where(elements_finished, cur_i, cand_i)
                return nest.map_structure(copy_fn, current, candidate)

            emit_output = _copy_some_through(zero_emit, emit_output)
            next_state = _copy_some_through(state, next_state)

            emit_ta = nest.map_structure(lambda ta, emit: ta.write(time, emit), emit_ta, emit_output)
            state_ta = nest.map_structure(lambda ta, state: ta.write(time, state), state_ta, next_state)

            elements_finished = math_ops.logical_or(elements_finished, next_finished)

            return (next_time, elements_finished, next_input, state_ta,
                    emit_ta, next_state, loop_state)

        returned = control_flow_ops.while_loop(
            condition, body, loop_vars=[
                time, elements_finished, next_input, state_ta,
                emit_ta, state, loop_state],
            parallel_iterations=parallel_iterations,
            swap_memory=swap_memory
        )

        (state_ta, emit_ta, final_state, final_loop_state) = returned[-4:]

        flat_states = nest.flatten(state_ta)
        flat_states = [array_ops.transpose(ta.stack(), (1, 0, 2)) for ta in flat_states]
        states = nest.pack_sequence_as(structure=state_ta, flat_sequence=flat_states)

        flat_outputs = nest.flatten(emit_ta)
        flat_outputs = [array_ops.transpose(ta.stack(), (1, 0, 2)) for ta in flat_outputs]
        outputs = nest.pack_sequence_as(structure=emit_ta, flat_sequence=flat_outputs)

        return (states, outputs, final_state)
示例#42
0
def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
          swap_memory=False, name=None):
  """foldl on the list of tensors unpacked from `elems` on dimension 0.

  This foldl operator repeatedly applies the callable `fn` to a sequence
  of elements from first to last. The elements are made of the tensors
  unpacked from `elems` on dimension 0. The callable fn takes two tensors as
  arguments. The first argument is the accumulated value computed from the
  preceding invocation of fn. If `initializer` is None, `elems` must contain
  at least one element, and its first element is used as the initializer.

  Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
  of the result tensor is fn(initializer, values[0]).shape`.

  This method also allows multi-arity `elems` and output of `fn`.  If `elems`
  is a (possibly nested) list or tuple of tensors, then each of these tensors
  must have a matching first (unpack) dimension.  The signature of `fn` may
  match the structure of `elems`.  That is, if `elems` is
  `(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is:
  `fn = lambda (t1, [t2, t3, [t4, t5]]):`.

  Args:
    fn: The callable to be performed.
    elems: A tensor or (possibly nested) sequence of tensors, each of which
      will be unpacked along their first dimension.  The nested sequence
      of the resulting slices will be the first argument to `fn`.
    initializer: (optional) A tensor or (possibly nested) sequence of tensors,
      as the initial value for the accumulator.
    parallel_iterations: (optional) The number of iterations allowed to run
      in parallel.
    back_prop: (optional) True enables support for back propagation.
    swap_memory: (optional) True enables GPU-CPU memory swapping.
    name: (optional) Name prefix for the returned tensors.

  Returns:
    A tensor or (possibly nested) sequence of tensors, resulting from applying
    `fn` consecutively to the list of tensors unpacked from `elems`, from first
    to last.

  Raises:
    TypeError: if `fn` is not callable.

  Example:
    ```python
    elems = tf.constant([1, 2, 3, 4, 5, 6])
    sum = foldl(lambda a, x: a + x, elems)
    # sum == 21
    ```
  """
  if not callable(fn):
    raise TypeError("fn must be callable.")

  def create_ta(elem):
    return tensor_array_ops.TensorArray(
        dtype=elem.dtype, size=n, dynamic_size=False,
        infer_shape=True).unstack(elem)

  in_graph_mode = not context.executing_eagerly()
  with ops.name_scope(name, "foldl", [elems]):
    # TODO(akshayka): Remove the in_graph_mode check once caching devices are
    # supported in Eager
    if in_graph_mode:
      # Any get_variable calls in fn will cache the first call locally
      # and not issue repeated network I/O requests for each iteration.
      varscope = vs.get_variable_scope()
      varscope_caching_device_was_none = False
      if varscope.caching_device is None:
        # TODO(ebrevdo): Change to using colocate_with here and in other
        # methods.
        varscope.set_caching_device(lambda op: op.device)
        varscope_caching_device_was_none = True

    # Convert elems to tensor array. n may be known statically.
    elems_flat = [
        ops.convert_to_tensor(elem, name="elem") for elem in nest.flatten(elems)
    ]
    n = (tensor_shape.dimension_value(elems_flat[0].shape[0])
         or array_ops.shape(elems_flat[0])[0])

    elems_ta = nest.map_structure(create_ta, elems)

    if initializer is None:
      a = nest.map_structure(lambda elem: elem.read(0), elems_ta)
      i = constant_op.constant(1)
    else:
      a = initializer
      i = constant_op.constant(0)

    def compute(i, a):
      elem_i = nest.map_structure(lambda elem: elem.read(i), elems_ta)
      a = fn(a, elem_i)
      return [i + 1, a]

    _, r_a = control_flow_ops.while_loop(
        lambda i, a: i < n, compute, [i, a],
        parallel_iterations=parallel_iterations,
        back_prop=back_prop,
        swap_memory=swap_memory,
        maximum_iterations=n)

    # TODO(akshayka): Remove the in_graph_mode check once caching devices are
    # supported in Eager
    if in_graph_mode and varscope_caching_device_was_none:
      varscope.set_caching_device(None)

    return r_a
示例#43
0
    def compute_gradients(self,
                          loss,
                          var_list,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None,
                          stop_gradients=None):
        """Compute gradients of `loss` for the variables in `var_list`.

    This is the first part of `minimize()`.  It returns a list
    of (gradient, variable) pairs where "gradient" is the gradient
    for "variable".  Note that "gradient" can be a `Tensor`, an
    `IndexedSlices`, or `None` if there is no gradient for the
    given variable.

    Args:
      loss: A Tensor containing the value to minimize or a callable taking no
        arguments which returns the value to minimize. When eager execution is
        enabled it must be a callable.
      var_list: Optional list or tuple of `tf.Variable` to update to minimize
        `loss`.  Defaults to the list of variables collected in the graph under
        the key `GraphKeys.TRAINABLE_VARIABLES`.
      aggregation_method: Specifies the method used to combine gradient terms.
        Valid values are defined in the class `AggregationMethod`.
      colocate_gradients_with_ops: If True, try colocating gradients with the
        corresponding op.
      grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
        through.

    Returns:
      A list of (gradient, variable) pairs. Variable is always present, but
      gradient can be `None`.

    Raises:
      TypeError: If `var_list` contains anything else than `Variable` objects.
      ValueError: If some arguments are invalid, or var_list is None.
      RuntimeError: If called with eager execution enabled and `loss` is
        not callable.

    @compatibility(eager)
    When eager execution is enabled, `aggregation_method`, and
    `colocate_gradients_with_ops` are ignored.
    @end_compatibility
    """
        var_list = nest.flatten(var_list)
        # TODO(josh11b): Test that we handle weight decay in a reasonable way.
        if callable(loss):
            with backprop.GradientTape() as tape:
                tape.watch(var_list)
                loss_value = loss()
            grads = tape.gradient(loss_value, var_list, grad_loss)
        else:
            if context.executing_eagerly():
                raise RuntimeError(
                    "`loss` passed to Optimizer.compute_gradients "
                    "should be a function when eager execution is "
                    "enabled.")
            self._assert_valid_dtypes([loss])
            if grad_loss is not None:
                self._assert_valid_dtypes([grad_loss])
            grads = gradients.gradients(
                loss,
                var_list,
                grad_ys=grad_loss,
                aggregation_method=aggregation_method,
                colocate_gradients_with_ops=colocate_gradients_with_ops,
                stop_gradients=stop_gradients)

        grads_and_vars = list(zip(grads, var_list))
        self._assert_valid_dtypes([
            v for g, v in grads_and_vars
            if g is not None and v.dtype != dtypes.resource
        ])

        return grads_and_vars
示例#44
0
 def inner(values):
   _ = [_check_failed(v) for v in nest.flatten(values)
        if not isinstance(v, expected_types)]
示例#45
0
def _check_not_tensor(values):
  _ = [_check_failed(v) for v in nest.flatten(values)
       if isinstance(v, ops.Tensor)]
示例#46
0
def is_batched_nested_tensors(tensors,
                              specs,
                              num_outer_dims=1,
                              allow_extra_fields=False):
    """Compares tensors to specs to determine if all tensors are batched or not.

  For each tensor, it checks the dimensions and dtypes with respect to specs.

  Returns `True` if all tensors are batched and `False` if all tensors are
  unbatched.

  Raises a `ValueError` if the shapes are incompatible or a mix of batched and
  unbatched tensors are provided.

  Raises a `TypeError` if tensors' dtypes do not match specs.

  Args:
    tensors: Nested list/tuple/dict of Tensors.
    specs: Nested list/tuple/dict of Tensors or CompositeTensors describing the
      shape of unbatched tensors.
    num_outer_dims: The integer number of dimensions that are considered batch
      dimensions.  Default 1.
    allow_extra_fields: If `True`, then `tensors` may have extra
      subfields which are not in specs.  In this case, the extra subfields
      will not be checked.  For example:

      ```python
      tensors = {"a": tf.zeros((3, 4), dtype=tf.float32),
                 "b": tf.zeros((5, 6), dtype=tf.float32)}
      specs = {"a": tf.TensorSpec(shape=(4,), dtype=tf.float32)}
      assert is_batched_nested_tensors(tensors, specs, allow_extra_fields=True)
      ```

      The above example would raise a ValueError if `allow_extra_fields`
      was False.

  Returns:
    True if all Tensors are batched and False if all Tensors are unbatched.

  Raises:
    ValueError: If
      1. Any of the tensors or specs have shapes with ndims == None, or
      2. The shape of Tensors are not compatible with specs, or
      3. A mix of batched and unbatched tensors are provided.
      4. The tensors are batched but have an incorrect number of outer dims.
    TypeError: If `dtypes` between tensors and specs are not compatible.
  """
    if allow_extra_fields:
        tensors = prune_extra_keys(specs, tensors)

    assert_same_structure(
        tensors,
        specs,
        message='Tensors and specs do not have matching structures')
    flat_tensors = nest.flatten(tensors)
    flat_specs = tf.nest.flatten(specs)

    tensor_shapes = [t.shape for t in flat_tensors]
    tensor_dtypes = [t.dtype for t in flat_tensors]
    spec_shapes = [spec_shape(s) for s in flat_specs]
    spec_dtypes = [t.dtype for t in flat_specs]

    if any(s_shape.rank is None for s_shape in spec_shapes):
        raise ValueError(
            'All specs should have ndims defined.  Saw shapes: %s' %
            (tf.nest.pack_sequence_as(specs, spec_shapes), ))

    if any(t_shape.rank is None for t_shape in tensor_shapes):
        raise ValueError(
            'All tensors should have ndims defined.  Saw shapes: %s' %
            (tf.nest.pack_sequence_as(specs, tensor_shapes), ))

    if any(s_dtype != t_dtype
           for s_dtype, t_dtype in zip(spec_dtypes, tensor_dtypes)):
        raise TypeError(
            'Tensor dtypes do not match spec dtypes:\n{}\nvs.\n{}'.format(
                tf.nest.pack_sequence_as(specs, tensor_dtypes),
                tf.nest.pack_sequence_as(specs, spec_dtypes)))
    is_unbatched = [
        s_shape.is_compatible_with(t_shape)
        for s_shape, t_shape in zip(spec_shapes, tensor_shapes)
    ]

    if all(is_unbatched):
        return False

    tensor_ndims_discrepancy = [
        t_shape.rank - s_shape.rank
        for s_shape, t_shape in zip(spec_shapes, tensor_shapes)
    ]

    tensor_matches_spec = [
        s_shape.is_compatible_with(t_shape[discrepancy:])
        for discrepancy, s_shape, t_shape in zip(tensor_ndims_discrepancy,
                                                 spec_shapes, tensor_shapes)
    ]

    # Check if all tensors match and have correct number of outer_dims.
    is_batched = (all(discrepancy == num_outer_dims
                      for discrepancy in tensor_ndims_discrepancy)
                  and all(tensor_matches_spec))

    if is_batched:
        return True

    # Check if tensors match but have incorrect number of batch dimensions.
    if all(discrepancy == tensor_ndims_discrepancy[0] for discrepancy in
           tensor_ndims_discrepancy) and all(tensor_matches_spec):
        return False

    raise ValueError(
        'Received a mix of batched and unbatched Tensors, or Tensors'
        ' are not compatible with Specs.  num_outer_dims: %d.\n'
        'Saw tensor_shapes:\n   %s\n'
        'And spec_shapes:\n   %s' %
        (num_outer_dims, tf.nest.pack_sequence_as(specs, tensor_shapes),
         tf.nest.pack_sequence_as(specs, spec_shapes)))
示例#47
0
  def _experimental_run_steps_on_iterator(
      self, fn, multi_worker_iterator, iterations, initial_loop_values=None):
    # Wrap `fn` for repeat.
    if initial_loop_values is None:
      initial_loop_values = {}
    initial_loop_values = nest.flatten(initial_loop_values)
    ctx = input_lib.MultiStepContext()

    def run_fn(inputs):
      """Single step on the TPU device."""
      fn_result = fn(ctx, inputs)
      flat_last_step_outputs = nest.flatten(ctx.last_step_outputs)
      if flat_last_step_outputs:
        with ops.control_dependencies([fn_result]):
          return [array_ops.identity(f) for f in flat_last_step_outputs]
      else:
        return fn_result

    # We capture the control_flow_context at this point, before we run `fn`
    # inside a while_loop and TPU replicate context. This is useful in cases
    # where we might need to exit these contexts and get back to the outer
    # context to do some things, for e.g. create an op which should be
    # evaluated only once at the end of the loop on the host. One such usage
    # is in creating metrics' value op.
    self._outer_control_flow_context = (
        ops.get_default_graph()._get_control_flow_context())  # pylint: disable=protected-access

    def rewrite_fn(*args):
      """The rewritten step fn running on TPU."""
      del args

      per_replica_inputs = multi_worker_iterator.get_next()
      replicate_inputs = []
      for replica_id in range(self._num_replicas_in_sync):
        select_replica = lambda x: values.select_replica(replica_id, x)  # pylint: disable=cell-var-from-loop
        replicate_inputs.append((nest.map_structure(
            select_replica, per_replica_inputs),))

      replicate_outputs = tpu.replicate(
          run_fn, replicate_inputs, device_assignment=self._device_assignment)

      # If run_fn has tensor outputs, tpu.replicate returns a list of list. We
      # will flatten it in this case. If run_fn has no tensor outputs,
      # tpu.replicate returns a list of no_ops, we will keep the output as it
      # is.
      if isinstance(replicate_outputs[0], list):
        replicate_outputs = nest.flatten(replicate_outputs)

      return replicate_outputs

    # TODO(sourabhbajaj): The input to while loop should be based on the
    # output type of the step_fn
    assert isinstance(initial_loop_values, list)
    initial_loop_values = initial_loop_values * self._num_replicas_in_sync

    # Put the while loop op on TPU host 0.
    with ops.device(self._host_device):
      if self.steps_per_run == 1:
        replicate_outputs = rewrite_fn()
      else:
        replicate_outputs = training_loop.repeat(iterations, rewrite_fn,
                                                 initial_loop_values)

    del self._outer_control_flow_context
    ctx.run_op = control_flow_ops.group(replicate_outputs)

    if isinstance(replicate_outputs, list):
      # Filter out any ops from the outputs, typically this would be the case
      # when there were no tensor outputs.
      last_step_tensor_outputs = [
          x for x in replicate_outputs if not isinstance(x, ops.Operation)
      ]

      # Outputs are currently of the structure (flattened)
      # [output0_device0, output1_device0, output2_device0,
      #  output0_device1, output1_device1, output2_device1,
      #  ...]
      # Convert this to the following structure instead: (grouped by output)
      # [[output0_device0, output0_device1],
      #  [output1_device0, output1_device1],
      #  [output2_device0, output2_device1]]
      output_num = len(last_step_tensor_outputs) // self._num_replicas_in_sync
      last_step_tensor_outputs = [
          last_step_tensor_outputs[i::output_num] for i in range(output_num)
      ]
    else:
      # no tensors returned.
      last_step_tensor_outputs = []

    _set_last_step_outputs(ctx, last_step_tensor_outputs)
    return ctx
示例#48
0
  def __init__(self,
               cycle_num_latent_values,
               moving_average_order,
               autoregressive_order,
               periodicities,
               use_level_noise=True,
               configuration=state_space_model.StateSpaceModelConfiguration()):
    """Initialize the multi-resolution structural ensemble.

    Args:
      cycle_num_latent_values: Controls the model size and the number of latent
          values cycled between (but not the periods over which they cycle).
          Reducing this parameter can save significant amounts of memory, but
          the tradeoff is with resolution: cycling between a smaller number of
          latent values means that only smoother functions can be modeled. For
          multivariate series, may either be a scalar integer (in which case it
          is applied to all periodic components) or a list with length matching
          `periodicities`.
      moving_average_order: The number of moving average coefficients to use,
          which also defines the number of steps after which transient
          deviations revert to the mean defined by periodic and level/trend
          components. Adds to model size.
      autoregressive_order: The number of steps back for
          autoregression. Learning autoregressive coefficients typically
          requires more steps and a smaller step size than other components.
      periodicities: Same meaning as for StructuralEnsemble: number of steps for
          cyclic behavior. Floating point and Tensor values are supported. May
          be a list of values, in which case one component is created for each
          periodicity. If `periodicities` is a list while
          `cycle_num_latent_values` is a scalar, its value is broadcast to each
          periodic component. Otherwise they should be lists of the same length,
          in which case they are paired.
      use_level_noise: See StructuralEnsemble.
      configuration: A StateSpaceModelConfiguration object.
    Raises:
      ValueError: If `cycle_num_latent_values` is neither a scalar nor agrees in
          size with `periodicities`.
    """
    component_model_configuration = configuration._replace(
        use_observation_noise=False)
    univariate_component_model_configuration = (
        component_model_configuration._replace(
            num_features=1))

    adder_part = _replicate_level_trend_models(
        multivariate_configuration=component_model_configuration,
        univariate_configuration=univariate_component_model_configuration)
    with variable_scope.variable_scope("varma"):
      varma_part = varma.VARMA(
          autoregressive_order=autoregressive_order,
          moving_average_order=moving_average_order,
          configuration=component_model_configuration)

    cycle_parts = []
    if periodicities is None:
      periodicities = []
    periodicity_list = nest.flatten(periodicities)
    latent_values_list = nest.flatten(cycle_num_latent_values)
    if len(periodicity_list) != len(latent_values_list):
      if len(latent_values_list) != 1:
        raise ValueError(
            ("`cycle_num_latent_values` must either be a list with the same "
             "size as `periodicity` or a scalar. Received length {} "
             "`cycle_num_latent_values`, while `periodicities` has length {}.")
            .format(len(latent_values_list), len(periodicity_list)))
      latent_values_list *= len(periodicity_list)
    for cycle_number, (cycle_periodicity, num_latent_values) in enumerate(
        zip(periodicity_list, latent_values_list)):
      with variable_scope.variable_scope("cycle{}".format(cycle_number)):
        cycle_features = []
        for feature in range(configuration.num_features):
          with variable_scope.variable_scope("feature{}".format(feature)):
            cycle_features.append(
                periodic.ResolutionCycleModel(
                    num_latent_values=num_latent_values,
                    periodicity=cycle_periodicity,
                    configuration=univariate_component_model_configuration))
        cycle_parts.append(
            state_space_model.StateSpaceCorrelatedFeaturesEnsemble(
                ensemble_members=cycle_features,
                configuration=component_model_configuration))

    super(MultiResolutionStructuralEnsemble, self).__init__(
        ensemble_members=[adder_part, varma_part] + cycle_parts,
        configuration=configuration)
示例#49
0
    def _experimental_run_steps_on_iterator(self,
                                            fn,
                                            iterator,
                                            iterations,
                                            initial_loop_values=None):
        if initial_loop_values is None:
            initial_loop_values = {}
        initial_loop_values = nest.flatten(initial_loop_values)

        ctx = input_lib.MultiStepContext()

        def body(i, *args):
            """A wrapper around `fn` to create the while loop body."""
            del args
            fn_result = fn(ctx, iterator.get_next())
            for (name, output) in ctx.last_step_outputs.items():
                # Convert all outputs to tensors, potentially from `DistributedValues`.
                ctx.last_step_outputs[name] = self._local_results(output)
            flat_last_step_outputs = nest.flatten(ctx.last_step_outputs)
            with ops.control_dependencies([fn_result]):
                return [i + 1] + flat_last_step_outputs

        # We capture the control_flow_context at this point, before we run `fn`
        # inside a while_loop. This is useful in cases where we might need to exit
        # these contexts and get back to the outer context to do some things, for
        # e.g. create an op which should be evaluated only once at the end of the
        # loop on the host. One such usage is in creating metrics' value op.
        self._outer_control_flow_context = (
            ops.get_default_graph()._get_control_flow_context())  # pylint: disable=protected-access

        cond = lambda i, *args: i < iterations
        i = constant_op.constant(0)
        loop_result = control_flow_ops.while_loop(cond,
                                                  body,
                                                  [i] + initial_loop_values,
                                                  name="",
                                                  parallel_iterations=1,
                                                  back_prop=False,
                                                  swap_memory=False,
                                                  return_same_structure=True)
        del self._outer_control_flow_context

        ctx.run_op = control_flow_ops.group(loop_result)

        # Convert the last_step_outputs from a list to the original dict structure
        # of last_step_outputs.
        last_step_tensor_outputs = loop_result[1:]
        last_step_tensor_outputs_dict = nest.pack_sequence_as(
            ctx.last_step_outputs, last_step_tensor_outputs)

        for name, reduce_op in ctx._last_step_outputs_reduce_ops.items():  # pylint: disable=protected-access
            output = last_step_tensor_outputs_dict[name]
            # For outputs that have already been reduced, wrap them in a Mirrored
            # container, else in a PerReplica container.
            if reduce_op is None:
                last_step_tensor_outputs_dict[name] = distribute_utils.regroup(
                    output)
            else:
                assert len(output) == 1
                last_step_tensor_outputs_dict[name] = output[0]

        ctx._set_last_step_outputs(last_step_tensor_outputs_dict)  # pylint: disable=protected-access
        return ctx
示例#50
0
  def jacobian(self,
               target,
               sources,
               unconnected_gradients=UnconnectedGradients.NONE,
               parallel_iterations=None,
               experimental_use_pfor=True):
    """Computes the jacobian using operations recorded in context of this tape.

    See [wikipedia article](http://en.wikipedia.org/wiki/jacobian_matrix_and_determinant) for the
    definition of a Jacobian.

    Example usage:

    ```python
    with tf.GradientTape() as g:
      x  = tf.constant([1.0, 2.0])
      g.watch(x)
      y = x * x
    jacobian = g.jacobian(y, x)
    # jacobian value is [[2., 0.], [0., 4.]]
    ```

    Args:
      target: Tensor to be differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      unconnected_gradients: a value which can either hold 'none' or 'zero' and
        alters the value which will be returned if the target and sources are
        unconnected. The possible values and effects are detailed in
        'UnconnectedGradients' and it defaults to 'none'.
      parallel_iterations: A knob to control how many iterations are dispatched
        in parallel. This knob can be used to control the total memory usage.
      experimental_use_pfor: If true, vectorizes the jacobian computation. Else
        falls back to a sequential while_loop. Vectorization can sometimes fail
        or lead to excessive memory usage. This option can be used to disable
        vectorization in such cases.

    Returns:
      A list or nested structure of Tensors (or None), one for each element in
      `sources`. Returned structure is the same as the structure of `sources`.
      Note if any gradient is sparse (IndexedSlices), jacobian function
      currently makes it dense and returns a Tensor instead. This may change in
      the future.


    Raises:
      RuntimeError: If called on a non-persistent tape with eager execution
        enabled and without enabling experimental_use_pfor.
      ValueError: If vectorization of jacobian computation fails.
    """
    flat_sources = nest.flatten(sources)
    target_static_shape = target.shape
    target_shape = array_ops.shape(target)
    # Note that we push and pop the tape here and below. This is needed since we
    # need gradients through the enclosed operations.
    self._push_tape()
    target = array_ops.reshape(target, [-1])
    self._pop_tape()

    def loop_fn(i):
      self._push_tape()
      y = array_ops.gather(target, i)
      self._pop_tape()
      return self.gradient(y, flat_sources,
                           unconnected_gradients=unconnected_gradients)

    try:
      target_size = int(target.shape[0])
    except TypeError:
      target_size = array_ops.shape(target)[0]

    if experimental_use_pfor:
      try:
        output = pfor_ops.pfor(loop_fn, target_size,
                               parallel_iterations=parallel_iterations)
      except ValueError as err:
        six.reraise(
            ValueError,
            ValueError(
                str(err) + "\nEncountered an exception while vectorizing the "
                "jacobian computation. Vectorization can be disabled by setting"
                " experimental_use_pfor to False."),
            sys.exc_info()[2])
    else:
      if context.executing_eagerly() and not self._persistent:
        raise RuntimeError(
            "GradientTape must be created with persistent=True"
            " to compute the jacobian with eager execution enabled and with "
            " experimental_use_pfor set to False.")
      output = pfor_ops.for_loop(
          loop_fn, [target.dtype] * len(flat_sources), target_size,
          parallel_iterations=parallel_iterations)

    for i, out in enumerate(output):
      if out is not None:
        new_shape = array_ops.concat(
            [target_shape, array_ops.shape(out)[1:]], axis=0)
        out = array_ops.reshape(out, new_shape)
        if context.executing_eagerly():
          out.set_shape(target_static_shape.concatenate(flat_sources[i].shape))
      output[i] = out

    return nest.pack_sequence_as(sources, output)
示例#51
0
 def update(self, x, y=None):
     sentence = nest.flatten(x)[0].numpy().decode('utf-8')
     self._tokenizer.fit_on_texts([sentence])
     sequence = self._tokenizer.texts_to_sequences([sentence])[0]
     if self.max_len is None:
         self._max_len = max(self._max_len, len(sequence))
示例#52
0
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
  """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
  # Used to keep track of the total loss value (stateless).
  # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
  #                   loss_weight_2 * output_2_loss_fn(...) +
  #                   layer losses.
  total_loss = 0
  kwargs = {}
  if model._expects_training_arg:
    kwargs['training'] = training
  if len(inputs) == 1 and not isinstance(inputs, dict):
    inputs = inputs[0]

  # Allow mixed `NumPy` and `EagerTensor` input here.
  if any(
      isinstance(input_t, (np.ndarray, float, int))
      for input_t in nest.flatten(inputs)):
    inputs = nest.map_structure(ops.convert_to_tensor, inputs)

  outs = model(inputs, **kwargs)

  outs = nest.flatten(outs)
  # `None` by default for `EagerTensors`.
  masks = [t._keras_mask for t in outs]
  targets = nest.flatten(targets)

  # Used to keep track of individual output losses (stateless).
  output_losses = []
  # Used to keep track of individual output losses (stateful).
  aggregated_output_losses = []

  with backend.name_scope('loss'):
    for i, loss_fn in enumerate(model.loss_functions):
      weights = sample_weights[i] if sample_weights else None
      mask = masks[i]
      with backend.name_scope(model.output_names[i] + '_loss'):
        if mask is not None:
          mask = math_ops.cast(mask, outs[i].dtype)
          # Update weights with mask.
          if weights is None:
            weights = mask
          else:
            # Update dimensions of weights to match with mask if possible.
            mask, _, weights = (
                losses_utils.squeeze_or_expand_dimensions(mask, None, weights))
            weights *= mask

        # Reset reduction on the loss so that we can get the per sample loss
        # value. We use this to get both the stateless and stateful loss
        # values without having to compute the underlying loss function
        # twice.
        weighted_losses = None
        if hasattr(loss_fn, 'reduction'):
          current_loss_reduction = loss_fn.reduction
          loss_fn.reduction = losses_utils.ReductionV2.NONE
          weighted_losses = loss_fn(targets[i], outs[i], sample_weight=weights)
          loss_fn.reduction = current_loss_reduction

          # Compute the stateless loss value.
          output_loss = losses_utils.reduce_weighted_loss(weighted_losses)
        else:
          # Compute the stateless loss value for a custom loss class.
          # Here we assume that the class takes care of loss reduction
          # because if this class returns a vector value we cannot
          # differentiate between use case where a custom optimizer
          # expects a vector loss value vs unreduced per-sample loss value.
          output_loss = loss_fn(targets[i], outs[i], sample_weight=weights)

      # If the number of outputs is 1 then we don't append the loss metric
      # associated with each model output. When there are multiple outputs
      # associated with a model, each output's loss is calculated and returned
      # as part of the loss_metrics.
      if len(model.outputs) > 1:
        output_losses.append(backend.mean(output_loss))
        if output_loss_metrics is not None:
          # Compute the stateful loss value.
          if weighted_losses is not None:
            aggregated_output_loss = output_loss_metrics[i](weighted_losses)
          else:
            # Custom loss class.
            aggregated_output_loss = training_utils.call_metric_function(
                output_loss_metrics[i], targets[i], outs[i], weights=weights)
          # Keep track of the stateful output loss result.
          aggregated_output_losses.append(aggregated_output_loss)

      loss_weight = model.loss_weights_list[i]
      if total_loss is None:
        total_loss = loss_weight * output_loss
      else:
        total_loss += loss_weight * output_loss

    total_loss = backend.mean(total_loss)
    # Add regularization losses
    custom_losses = model.losses
    if custom_losses:
      total_loss += math_ops.add_n(custom_losses)
    model._clear_losses()

  return outs, total_loss, output_losses, aggregated_output_losses, masks
示例#53
0
def build_graph(mode, config, sequence_example_file_paths=None):
    """Builds the TensorFlow graph.

  Args:
    mode: 'train', 'eval', or 'generate'. Only mode related ops are added to
        the graph.
    config: An EventSequenceRnnConfig containing the encoder/decoder and HParams
        to use.
    sequence_example_file_paths: A list of paths to TFRecord files containing
        tf.train.SequenceExample protos. Only needed for training and
        evaluation.

  Returns:
    A tf.Graph instance which contains the TF ops.

  Raises:
    ValueError: If mode is not 'train', 'eval', or 'generate'.
  """
    if mode not in ('train', 'eval', 'generate'):
        raise ValueError("The mode parameter must be 'train', 'eval', "
                         "or 'generate'. The mode parameter was: %s" % mode)

    hparams = config.hparams
    encoder_decoder = config.encoder_decoder

    tf.logging.info('hparams = %s', hparams.values())

    input_size = encoder_decoder.input_size
    num_classes = encoder_decoder.num_classes
    no_event_label = encoder_decoder.default_event_label

    with tf.Graph().as_default() as graph:
        inputs, labels, lengths = None, None, None

        if mode == 'train' or mode == 'eval':
            inputs, labels, lengths = magenta.common.get_padded_batch(
                sequence_example_file_paths,
                hparams.batch_size,
                input_size,
                shuffle=mode == 'train')

        elif mode == 'generate':
            inputs = tf.placeholder(tf.float32,
                                    [hparams.batch_size, None, input_size])

        cell = make_rnn_cell(hparams.rnn_layer_sizes,
                             dropout_keep_prob=(1.0 if mode == 'generate' else
                                                hparams.dropout_keep_prob),
                             attn_length=(hparams.attn_length if hasattr(
                                 hparams, 'attn_length') else 0))

        initial_state = cell.zero_state(hparams.batch_size, tf.float32)

        outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                 inputs,
                                                 sequence_length=lengths,
                                                 initial_state=initial_state,
                                                 swap_memory=True)

        outputs_flat = magenta.common.flatten_maybe_padded_sequences(
            outputs, lengths)
        logits_flat = tf.contrib.layers.linear(outputs_flat, num_classes)

        if mode == 'train' or mode == 'eval':
            labels_flat = magenta.common.flatten_maybe_padded_sequences(
                labels, lengths)

            softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=labels_flat, logits=logits_flat)

            predictions_flat = tf.argmax(logits_flat, axis=1)
            correct_predictions = tf.to_float(
                tf.equal(labels_flat, predictions_flat))
            event_positions = tf.to_float(
                tf.not_equal(labels_flat, no_event_label))
            no_event_positions = tf.to_float(
                tf.equal(labels_flat, no_event_label))

            if mode == 'train':
                loss = tf.reduce_mean(softmax_cross_entropy)
                perplexity = tf.exp(loss)
                accuracy = tf.reduce_mean(correct_predictions)
                event_accuracy = (
                    tf.reduce_sum(correct_predictions * event_positions) /
                    tf.reduce_sum(event_positions))
                no_event_accuracy = (
                    tf.reduce_sum(correct_predictions * no_event_positions) /
                    tf.reduce_sum(no_event_positions))

                optimizer = tf.train.AdamOptimizer(
                    learning_rate=hparams.learning_rate)

                train_op = tf.contrib.slim.learning.create_train_op(
                    loss, optimizer, clip_gradient_norm=hparams.clip_norm)
                tf.add_to_collection('train_op', train_op)

                vars_to_summarize = {
                    'loss': loss,
                    'metrics/perplexity': perplexity,
                    'metrics/accuracy': accuracy,
                    'metrics/event_accuracy': event_accuracy,
                    'metrics/no_event_accuracy': no_event_accuracy,
                }
            elif mode == 'eval':
                vars_to_summarize, update_ops = tf.contrib.metrics.aggregate_metric_map(
                    {
                        'loss':
                        tf.metrics.mean(softmax_cross_entropy),
                        'metrics/accuracy':
                        tf.metrics.accuracy(labels_flat, predictions_flat),
                        'metrics/per_class_accuracy':
                        tf.metrics.mean_per_class_accuracy(
                            labels_flat, predictions_flat, num_classes),
                        'metrics/event_accuracy':
                        tf.metrics.recall(event_positions,
                                          correct_predictions),
                        'metrics/no_event_accuracy':
                        tf.metrics.recall(no_event_positions,
                                          correct_predictions),
                    })
                for updates_op in update_ops.values():
                    tf.add_to_collection('eval_ops', updates_op)

                # Perplexity is just exp(loss) and doesn't need its own update op.
                vars_to_summarize['metrics/perplexity'] = tf.exp(
                    vars_to_summarize['loss'])

            for var_name, var_value in six.iteritems(vars_to_summarize):
                tf.summary.scalar(var_name, var_value)
                tf.add_to_collection(var_name, var_value)

        elif mode == 'generate':
            temperature = tf.placeholder(tf.float32, [])
            softmax_flat = tf.nn.softmax(
                tf.div(logits_flat, tf.fill([num_classes], temperature)))
            softmax = tf.reshape(softmax_flat,
                                 [hparams.batch_size, -1, num_classes])

            tf.add_to_collection('inputs', inputs)
            tf.add_to_collection('temperature', temperature)
            tf.add_to_collection('softmax', softmax)
            # Flatten state tuples for metagraph compatibility.
            for state in tf_nest.flatten(initial_state):
                tf.add_to_collection('initial_state', state)
            for state in tf_nest.flatten(final_state):
                tf.add_to_collection('final_state', state)

    return graph
示例#54
0
def object_list_uid(object_list):
  """Creates a single string from object ids."""
  object_list = nest.flatten(object_list)
  return ', '.join([str(abs(id(x))) for x in object_list])
示例#55
0
 def _flat_tensor_specs(self):
     """A list of TensorSpecs compatible with self._to_tensor_list(v)."""
     return nest.flatten(self._component_specs, expand_composites=True)
示例#56
0
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
    """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
    total_loss = 0
    kwargs = {}
    if model._expects_training_arg:
        kwargs['training'] = training
    if len(inputs) == 1 and not isinstance(inputs, dict):
        inputs = inputs[0]

    if model._compute_output_and_mask_jointly:
        outs, masks = model._call_and_compute_mask(inputs, **kwargs)
        masks = nest.flatten(masks)
    else:
        outs = model.call(inputs, **kwargs)
        masks = None

    outs = nest.flatten(outs)
    if masks is None:
        masks = [None for _ in outs]
    targets = nest.flatten(targets)

    loss_metrics = []
    aggregated_loss_metrics = []
    with backend.name_scope('loss'):
        for i, loss_fn in enumerate(model.loss_functions):
            if sample_weights:
                weights = sample_weights[i]
            else:
                weights = None
            mask = masks[i]
            with backend.name_scope(model.output_names[i] + '_loss'):
                if isinstance(loss_fn, losses_module.Loss):
                    if mask is not None:
                        mask = math_ops.cast(mask, outs[i].dtype)
                        # Update weights with mask.
                        if weights is None:
                            weights = mask
                        else:
                            # Update dimensions of weights to match with mask if possible.
                            mask, _, weights = squeeze_or_expand_dimensions(
                                mask, None, weights)
                            weights *= mask
                    output_loss = loss_fn(targets[i],
                                          outs[i],
                                          sample_weight=weights)
                else:
                    weighted_masked_fn = training_utils.weighted_masked_objective(
                        loss_fn)
                    output_loss = weighted_masked_fn(targets[i],
                                                     outs[i],
                                                     weights,
                                                     mask=mask)

            # If the number of outputs is 1 then we don't append the loss metric
            # associated with each model output. When there are multiple outputs
            # associated with a model, each output's loss is calculated and returned
            # as part of the loss_metrics.
            if len(model.outputs) > 1:
                loss_metrics.append(backend.mean(output_loss))

                if output_loss_metrics is not None:
                    # Keep track of the stateful loss result.
                    aggregated_loss_metrics.append(
                        training_utils.call_metric_function(
                            output_loss_metrics[i],
                            targets[i],
                            outs[i],
                            weights=weights,
                            mask=mask))

            loss_weight = model.loss_weights_list[i]
            if total_loss is None:
                total_loss = loss_weight * output_loss
            else:
                total_loss += loss_weight * output_loss

        total_loss = backend.mean(total_loss)
        # Add regularization losses
        custom_losses = model.losses
        if custom_losses:
            total_loss += math_ops.add_n(custom_losses)
        model._clear_losses()

    return outs, total_loss, loss_metrics, aggregated_loss_metrics, masks
示例#57
0
def test_on_batch(model,
                  inputs,
                  targets,
                  sample_weights=None,
                  reset_metrics=True,
                  output_loss_metrics=None):
  """Calculates the loss for one input batch.

  Arguments:
      model: Model whose loss has to be calculated.
      inputs: Input batch data.
      targets: Target batch data.
      sample_weights: Sample weight batch data.
      reset_metrics: If `True`, the metrics returned will be only for this
        batch. If `False`, the metrics will be statefully accumulated across
        batches.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.

  Returns:
      total loss, loss and metrics associated with each output.
  """
  if isinstance(inputs, collections.Sequence):
    if len(inputs) and tensor_util.is_tensor(inputs[0]):
      inputs = training_utils.cast_if_floating_dtype(inputs)
      targets = training_utils.cast_if_floating_dtype(targets)
    else:
      inputs = training_utils.cast_if_floating_dtype(
          [ops.convert_to_tensor(val) for val in inputs])
      targets = training_utils.cast_if_floating_dtype(
          [ops.convert_to_tensor(val) for val in targets])
  if sample_weights:
    sample_weights = [
        training_utils.cast_if_floating_dtype(ops.convert_to_tensor(val))
        if val is not None else None for val in sample_weights
    ]
  outs, total_loss, output_losses, aggregated_output_losses, masks = (
      _model_loss(
          model,
          inputs,
          targets,
          sample_weights=sample_weights,
          training=False,
          output_loss_metrics=output_loss_metrics))
  if not isinstance(outs, list):
    outs = [outs]
  metrics_results = _eager_metrics_fn(
      model,
      outs,
      targets,
      sample_weights=sample_weights,
      masks=masks,
      return_stateful_result=not reset_metrics)
  total_loss = nest.flatten(total_loss)
  if reset_metrics:
    final_output_losses = output_losses
  else:
    final_output_losses = aggregated_output_losses
  results = total_loss + final_output_losses + metrics_results

  return [tensor_util.constant_value(v) for v in results]
示例#58
0
  def gradient(self,
               target,
               sources,
               output_gradients=None,
               unconnected_gradients=UnconnectedGradients.NONE):
    """Computes the gradient using operations recorded in context of this tape.

    Args:
      target: a list or nested structure of Tensors or Variables to be
        differentiated.
      sources: a list or nested structure of Tensors or Variables. `target`
        will be differentiated against elements in `sources`.
      output_gradients: a list of gradients, one for each element of
        target. Defaults to None.
      unconnected_gradients: a value which can either hold 'none' or 'zero' and
        alters the value which will be returned if the target and sources are
        unconnected. The possible values and effects are detailed in
        'UnconnectedGradients' and it defaults to 'none'.

    Returns:
      a list or nested structure of Tensors (or IndexedSlices, or None),
      one for each element in `sources`. Returned structure is the same as
      the structure of `sources`.

    Raises:
      RuntimeError: if called inside the context of the tape, or if called more
       than once on a non-persistent tape.
      ValueError: if the target is a variable or if unconnected gradients is
       called with an unknown value.
    """
    if self._tape is None:
      raise RuntimeError("GradientTape.gradient can only be called once on "
                         "non-persistent tapes.")
    if self._recording:
      if not self._persistent:
        self._pop_tape()
      else:
        logging.log_first_n(
            logging.WARN, "Calling GradientTape.gradient on a persistent "
            "tape inside its context is significantly less "
            "efficient than calling it outside the context (it "
            "causes the gradient ops to be recorded on the "
            "tape, leading to increased CPU and memory usage). "
            "Only call GradientTape.gradient inside the "
            "context if you actually want to trace the "
            "gradient in order to compute higher order "
            "derivatives.", 1)

    flat_targets = []
    for t in nest.flatten(target):
      if not backprop_util.IsTrainable(t):
        logging.vlog(
            logging.WARN, "The dtype of the target tensor must be "
            "floating (e.g. tf.float32) when calling GradientTape.gradient, "
            "got %r", t.dtype)
      if resource_variable_ops.is_resource_variable(t):
        with self:
          t = ops.convert_to_tensor(t)
      flat_targets.append(t)

    flat_sources = nest.flatten(sources)
    flat_sources_raw = flat_sources
    flat_sources = [_handle_or_self(x) for x in flat_sources]
    for t in flat_sources_raw:
      if not backprop_util.IsTrainable(t):
        logging.vlog(
            logging.WARN, "The dtype of the source tensor must be "
            "floating (e.g. tf.float32) when calling GradientTape.gradient, "
            "got %r", t.dtype)

    if output_gradients is not None:
      output_gradients = [None if x is None else ops.convert_to_tensor(x)
                          for x in nest.flatten(output_gradients)]

    flat_grad = imperative_grad.imperative_grad(
        self._tape,
        flat_targets,
        flat_sources,
        output_gradients=output_gradients,
        sources_raw=flat_sources_raw,
        unconnected_gradients=unconnected_gradients)

    if not self._persistent:
      # Keep track of watched variables before setting tape to None
      self._watched_variables = self._tape.watched_variables()
      self._tape = None

    grad = nest.pack_sequence_as(sources, flat_grad)
    return grad
示例#59
0
def assert_matching_dtypes_and_inner_shapes(tensors,
                                            specs,
                                            caller,
                                            tensors_name,
                                            specs_name,
                                            allow_extra_fields=False):
    """Returns `True` if tensors and specs have matching dtypes and inner shapes.

  Args:
    tensors: A nest of tensor objects.
    specs: A nest of `tf.TypeSpec` objects.
    caller: The object calling `assert...`.
    tensors_name: (str) Name to use for the tensors in case of an error.
    specs_name: (str) Name to use for the specs in case of an error.
    allow_extra_fields: If `True`, then `tensors` may contain more keys or list
      fields than strictly required by `specs`.

  Raises:
    ValueError: If the tensors do not match the specs' dtypes or their inner
      shapes do not match the specs' shapes.
  """
    if allow_extra_fields:
        tensors = prune_extra_keys(specs, tensors)
    assert_same_structure(
        tensors,
        specs,
        message=('{}: {} and {} do not have matching structures'.format(
            caller, tensors_name, specs_name)))

    flat_tensors = nest.flatten(tensors)
    flat_specs = tf.nest.flatten(specs)
    flat_tensors = [
        tf.convert_to_tensor(t, dtype_hint=s.dtype)
        if not tf.is_tensor(t) else t
        for (t, s) in zip(flat_tensors, flat_specs)
    ]

    tensor_shapes = [t.shape for t in flat_tensors]
    tensor_dtypes = [t.dtype for t in flat_tensors]
    spec_shapes = [spec_shape(s) for s in flat_specs]
    spec_dtypes = [t.dtype for t in flat_specs]

    compatible = True

    if any(s_dtype != t_dtype
           for s_dtype, t_dtype in zip(spec_dtypes, tensor_dtypes)):
        compatible = False
    else:
        for s_shape, t_shape in zip(spec_shapes, tensor_shapes):
            if s_shape.ndims in (0, None) or t_shape.ndims is None:
                continue
            if s_shape.ndims > t_shape.ndims:
                compatible = False
                break
            if not s_shape.is_compatible_with(t_shape[-s_shape.ndims:]):
                compatible = False
                break

    if not compatible:
        get_dtypes = lambda v: tf.nest.map_structure(lambda x: x.dtype, v)
        get_shapes = lambda v: tf.nest.map_structure(spec_shape, v)
        raise ValueError(
            '{}: Inconsistent dtypes or shapes between {} and {}.\n'
            'dtypes:\n{}\nvs.\n{}.\n'
            'shapes:\n{}\nvs.\n{}.'.format(caller, tensors_name, specs_name,
                                           get_dtypes(tensors),
                                           get_dtypes(specs),
                                           get_shapes(tensors),
                                           get_shapes(specs)))
示例#60
0
def func_graph_from_py_func(name,
                            python_func,
                            args,
                            kwargs,
                            signature=None,
                            func_graph=None,
                            autograph=False,
                            add_control_dependencies=True,
                            arg_names=None,
                            op_return_value=None):
    """Returns a `FuncGraph` generated from `python_func`.

  Args:
    name: an identifier for the function.
    python_func: the Python function to trace.
    args: the positional args with which the Python function should be called;
      ignored if a signature is provided.
    kwargs: the keyword args with which the Python function should be called;
      ignored if a signature is provided.
    signature: a possibly nested sequence of `TensorSpecs` specifying the shapes
      and dtypes of the arguments. When a signature is provided, `args` and
      `kwargs` are ignored, and `python_func` is traced with Tensors conforming
      to `signature`. If `None`, the shapes and dtypes are inferred from the
      inputs.
    func_graph: Optional. An instance of FuncGraph. If provided, we will use
      this graph else a new one is built and returned.
    autograph: whether to use autograph to compile `python_func`.
      See https://www.tensorflow.org/guide/autograph for more information.
    add_control_dependencies: If True, automatically adds control dependencies
      to ensure program order matches execution order and stateful ops always
      execute.
    arg_names: Optional list of argument names, used to give input placeholders
      recognizable names.
    op_return_value: Optional. A Tensor. If set and `python_func` returns
      Operations, those return values will be replaced with this value. If not
      set, returning an Operation triggers an error.

  Returns:
    A FuncGraph.

  Raises:
    TypeError: If any of `python_func`'s return values is neither `None` nor a
      `Tensor`.
  """
    if op_return_value is not None:
        assert isinstance(op_return_value, ops.Tensor), op_return_value
    if func_graph is None:
        func_graph = FuncGraph(name)
    assert isinstance(func_graph, FuncGraph)
    if add_control_dependencies:
        control_manager = AutomaticControlDependencies
    else:
        control_manager = ops.NullContextmanager
    with func_graph.as_default(), control_manager() as a:
        current_scope = variable_scope.get_variable_scope()
        default_use_recource = current_scope.use_resource
        current_scope.set_use_resource(True)

        if signature is not None:
            args = signature
            kwargs = {}

        # Creates and names placeholders for all arguments.
        func_args = _get_defun_inputs_from_args(args, arg_names)
        func_kwargs = _get_defun_inputs_from_kwargs(kwargs)

        # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`.
        # Variables to help check whether mutation happens in calling the function
        # Copy the recursive list, tuple and map structure, but not base objects
        func_args_before = nest.pack_sequence_as(func_args,
                                                 nest.flatten(func_args))
        func_kwargs_before = nest.pack_sequence_as(func_kwargs,
                                                   nest.flatten(func_kwargs))

        def convert(x):
            """Converts a function output to a Tensor."""
            if x is None:
                return None
            if op_return_value is not None and isinstance(x, ops.Operation):
                # TODO(b/79881896): we currently can't capture external control deps, so
                # this won't work if x needs to be captured (i.e. if python_func returns
                # captured Operations).
                with ops.control_dependencies([x]):
                    x = array_ops.identity(op_return_value)
            elif not isinstance(x, tensor_array_ops.TensorArray):
                try:
                    x = ops.convert_to_tensor_or_indexed_slices(x)
                except (ValueError, TypeError):
                    raise TypeError(
                        "To be compatible with tf.contrib.eager.defun, Python functions "
                        "must return zero or more Tensors; in compilation of %s, found "
                        "return value of type %s, which is not a Tensor." %
                        (str(python_func), type(x)))
            if add_control_dependencies:
                x = a.mark_as_return(x)
            return x

        this_tape = tape.push_new_tape()
        try:
            if autograph:
                from tensorflow.python import autograph  # pylint: disable=g-import-not-at-top
                _, original_func = tf_decorator.unwrap(python_func)

                def wrapper(*args, **kwargs):
                    return autograph.converted_call(
                        original_func, None,
                        autograph.ConversionOptions(
                            verbose=autograph.Verbosity.BRIEF,
                            recursive=True,
                            strip_decorators=(def_function.function, ),
                            optional_features=(),
                        ), *args, **kwargs)

                # Wrapping around a decorator allows checks like tf_inspect.getargspec
                # to be accurate.
                converted_func = tf_decorator.make_decorator(
                    original_func, wrapper)
                tf_decorator.rewrap(python_func, original_func, converted_func)

            func_outputs = python_func(*func_args, **func_kwargs)

            # invariant: `func_outputs` contains only Tensors, IndexedSlices,
            # SparseTensors, TensorArrays and `None`s.
            func_outputs = nest.map_structure(convert, func_outputs)

            check_mutation(func_args_before, func_args)
            check_mutation(func_kwargs_before, func_kwargs)
        finally:
            tape.pop_tape(this_tape)
            current_scope.set_use_resource(default_use_recource)

        # Variables in `func_args`, `func_kwargs` should be explicit inputs
        # to the function, not captured inputs.
        tape_variables = this_tape.watched_variables()
        arg_variables = set()
        inputs = []
        for arg in nest.flatten(func_args) + nest.flatten(func_kwargs):
            if isinstance(arg, resource_variable_ops.ResourceVariable):
                # Even if an argument variable was not used in the function, we've
                # already manually captured the resource Tensor when creating argument
                # placeholders.
                resource_placeholder = func_graph.captures.pop(arg.handle)
                arg_variables.add(arg)
                inputs.append(resource_placeholder)
            elif isinstance(arg, ops.Tensor):
                inputs.append(arg)
        variables = [v for v in tape_variables if v not in arg_variables]
        func_graph.inputs = inputs + list(func_graph.captures.values())

        func_graph.structured_outputs = func_outputs
        # Returning a closed-over tensor does not trigger convert_to_tensor.
        func_graph.outputs.extend(
            func_graph.capture(x)
            for x in flatten(func_graph.structured_outputs) if x is not None)

        func_graph.variables = variables

    # Register any other functions defined in the graph.
    with ops.init_scope():
        if context.executing_eagerly():
            for f in func_graph._functions.values():  # pylint: disable=protected-access
                # TODO(ashankar): What about the gradient registry?
                context.add_function(f._c_func.func)  # pylint: disable=protected-access

    return func_graph