Exemplo n.º 1
0
  def testSequencePreprocessor(self):
    sub1_p = input_preprocessors.ConstantPreprocessor.Params().Set(
        name='sub1', constants={'foo': 1})
    sub2_p = input_preprocessors.ConstantPreprocessor.Params().Set(
        name='sub2', constants={'bar': 2})
    preprocessor_p = input_preprocessors.Sequence.Params().Set(
        name='list', preprocessors=[sub1_p, sub2_p])

    features = py_utils.NestedMap()
    shapes = py_utils.NestedMap()
    dtypes = py_utils.NestedMap()

    preprocessor = preprocessor_p.Instantiate()
    new_features = preprocessor.TransformFeatures(features)
    new_shapes = preprocessor.TransformShapes(shapes)
    new_dtypes = preprocessor.TransformDTypes(dtypes)

    # Verify shape and dtype
    self.assertEqual(new_shapes.foo, tf.TensorShape([]))
    self.assertEqual(new_shapes.bar, tf.TensorShape([]))

    self.assertEqual(new_dtypes.foo, tf.int64)
    self.assertEqual(new_dtypes.bar, tf.int64)

    with self.session() as sess:
      np_new_features = sess.run(new_features)
      # Check the new constants exist in the features for both preprocessors
      self.assertEqual(np_new_features.foo, 1)
      self.assertEqual(np_new_features.bar, 2)
Exemplo n.º 2
0
    def testIdentityPreprocessor(self):
        input_p = input_preprocessors.ConstantPreprocessor.Params().Set(
            constants={
                'value1': 1,
                'value2': np.array([2])
            })
        identity_p = input_preprocessors.IdentityPreprocessor.Params()
        features = py_utils.NestedMap()
        shapes = py_utils.NestedMap()
        dtypes = py_utils.NestedMap()

        preprocessors = [input_p.Instantiate(), identity_p.Instantiate()]
        for preprocessor in preprocessors:
            # Verify shape / dtypes.
            shapes = preprocessor.TransformShapes(shapes)
            dtypes = preprocessor.TransformDTypes(dtypes)
            features = preprocessor.TransformFeatures(features)

        self.assertEqual(self.evaluate(features.value1), 1)
        self.assertEqual(shapes.value1, tf.TensorShape([]))
        self.assertEqual(dtypes.value1, tf.int64)

        self.assertEqual(self.evaluate(features.value2), [2])
        self.assertEqual(shapes.value2, tf.TensorShape([1]))
        self.assertEqual(dtypes.value2, tf.int64)
Exemplo n.º 3
0
  def Shape(self):
    """Shape of BBoxes."""
    p = self.params
    shapes = {}
    for laser in p.cbr_laser_names:
      cbr_shape = p.cbr_ri_shape[:-1]
      for returns in p.returns:
        shape_dict = py_utils.NestedMap({
            'xyz': tf.TensorShape(cbr_shape + [3]),
            'features': tf.TensorShape(cbr_shape + [4]),
            'mask': tf.TensorShape(cbr_shape),
        })
        shapes['%s_%s' % (laser, returns)] = shape_dict

      shapes['%s_extrinsics' % laser] = tf.TensorShape([4, 4])
      shapes['%s_beam_inclinations' % laser] = tf.TensorShape([2])
    for laser in p.gbr_laser_names:
      gbr_shape = p.gbr_ri_shape[:-1]
      for returns in p.returns:
        shape_dict = py_utils.NestedMap({
            'xyz': tf.TensorShape(gbr_shape + [3]),
            'features': tf.TensorShape(gbr_shape + [4]),
            'mask': tf.TensorShape(gbr_shape),
        })
        shapes['%s_%s' % (laser, returns)] = shape_dict
      shapes['%s_extrinsics' % laser] = tf.TensorShape([4, 4])
      shapes['%s_beam_inclinations' % laser] = tf.TensorShape([64])

    return py_utils.NestedMap(shapes)
Exemplo n.º 4
0
 def Shape(self):
     p = self.params
     ret = py_utils.NestedMap(
         points_xyz=tf.TensorShape([p.max_num_points, 3]),
         points_feature=tf.TensorShape([p.max_num_points, p.num_features]))
     if p.max_num_points is not None:
         ret.points_padding = tf.TensorShape([p.max_num_points])
     return ret
Exemplo n.º 5
0
  def Shape(self):
    """Shape of images."""
    p = self.params
    shapes = {'frame_pose': tf.TensorShape([4, 4])}

    for camera_name in p.camera_names:
      shapes['%s' % camera_name] = tf.TensorShape(p.image_shape)
      # 1d Array of [f_u, f_v, c_u, c_v, k{1, 2}, p{1, 2}, k{3}].
      # Note that this intrinsic corresponds to the images after scaling.
      # Camera model: pinhole camera.
      # Lens distortion:
      # Radial distortion coefficients: k1, k2, k3.
      # Tangential distortion coefficients: p1, p2.
      # k_{1, 2, 3}, p_{1, 2} follows the same definition as OpenCV.
      shapes['%s_intrinsics' % camera_name] = tf.TensorShape([9])
      shapes['%s_extrinsics' % camera_name] = tf.TensorShape([4, 4])
      shapes['%s_pose' % camera_name] = tf.TensorShape([4, 4])
      shapes['%s_velocity' % camera_name] = tf.TensorShape([6])
      for feat in [
          'pose_timestamp', 'shutter', 'camera_trigger_time',
          'camera_readout_done_time'
      ]:
        shapes['%s_%s' % (camera_name, feat)] = tf.TensorShape([])
      shapes['%s_rolling_shutter_direction' % camera_name] = tf.TensorShape([])

    return py_utils.NestedMap(shapes)
Exemplo n.º 6
0
  def __init__(self, dtype, shape, send_device, recv_device, name=None):
    """Construct a channel.

    Args:
      dtype: The dtype of tensors sent through the channel.
      shape: The shape of tensors sent through the channel. Must be a fully
        defined shape for TPUs.
      send_device: A fully-specified tensorflow device.
      recv_device: A fully-specified tensorflow device.
      name: A name for the channel (optional).
    """
    current_graph = tf.get_default_graph()
    assert current_graph, "A channel is scoped within a tf.Graph"
    self._dtype = dtype
    self._send_device = send_device
    self._recv_device = recv_device
    self._name = current_graph.unique_name(name if name else "channel")

    assert shape is not None
    shape = tf.TensorShape(shape)

    self._shape = shape
    self._send_tpu_core = _TpuCore(send_device)
    self._recv_tpu_core = _TpuCore(recv_device)
    self._send_called = False
    self._recv_op = None
    assert ((self._send_tpu_core == -1) == (self._recv_tpu_core == -1)), (
        "Mixing TPU and non-TPU: %s and %s" % (send_device, recv_device))
    if self._send_tpu_core >= 0:
      assert self._shape.is_fully_defined(), (
          "TPU channel must have fully defined shape. Name: %s, shape: %s" %
          (self._name, self._shape))
      assert self._send_tpu_core != self._recv_tpu_core, (
          "TPU send/recv must be cross-core: %s and %s" %
          (send_device, recv_device))
Exemplo n.º 7
0
    def EncodeModality(self, modality: str, inputs, batch_shape=None):
        """Runs `inputs` through `modality`'s encoder and optional projection.

    Args:
      modality: Name of the modality to encode.
      inputs: Tensor(s) of input to the encoder, e.g. a batch of decoded images.
      batch_shape: TensorShape describing the batch structure of the inputs.
        Defaults to `[None]`, which means `inputs` have a single batch
        dimension. Set to (e.g.) `[None, 5]` if each example in the batch
        contains 5 encodable items.

    Returns:
      A float32 Tensor of the encoded items, shape
      `batch_shape + [joint_embedding_dim]`
    """
        if batch_shape is None:
            batch_shape = tf.TensorShape([None])
        if not isinstance(inputs, tuple):
            inputs = (inputs, )

        encodings = _EncodeBatch(self.encoders[modality],
                                 inputs,
                                 batch_shape=batch_shape)

        # If necessary, project outputs to joint_embedding_dim.
        if modality in self.projections:
            return self.projections[modality](encodings)
        else:
            return encodings
Exemplo n.º 8
0
 def Shape(self):
   """Shape of BBoxes."""
   p = self.params
   shapes = {
       'labels': tf.TensorShape([p.max_num_objects]),
       'label_ids': tf.TensorShape([p.max_num_objects]),
       'detection_difficulties': tf.TensorShape([p.max_num_objects]),
       'tracking_difficulties': tf.TensorShape([p.max_num_objects]),
       'bboxes_3d': tf.TensorShape([p.max_num_objects, 7]),
       'bboxes_3d_mask': tf.TensorShape([p.max_num_objects]),
       'bboxes_3d_num_points': tf.TensorShape([p.max_num_objects]),
       'unfiltered_bboxes_3d_mask': tf.TensorShape([p.max_num_objects]),
       'speed': tf.TensorShape([p.max_num_objects, 2]),
       'acceleration': tf.TensorShape([p.max_num_objects, 2])
   }
   return py_utils.NestedMap(shapes)
Exemplo n.º 9
0
  def FProp(self, theta, *args):
    """Runs p.repeat copies of self.body.FProp independently.

    Args:
      theta: Layer model parameters. The shape of each variable in theta is
        always [p.repeat, ...]. And the i-th slice theta[i] becomes theta of the
        i-th copy of self.body.
      *args: Input arguments. The shape of each tensor in args is always
        [p.repeat, ....]. And the list [arg[i] for arg in args] becomes inputs
        to the i-th copy of self.body.FProp.

    Returns:
      The accumulated output_tensors. Each tensor t in the return has the shape
      [p.repeat, ....] and the tuple (t[i] for i in output_tensors) is the
      return tuple of the i-th self.body.FProp.
    """
    p = self.params
    for arg in args:
      if arg is not None:
        arg = py_utils.HasShape(arg, [p.repeat], ndims=1)

    theta_stack = _MaybeStackExtraTheta(theta.body, self.body.vars, p.repeat)
    inputs = py_utils.NestedMap(theta=theta_stack, args=list(args))
    # Infer out_shapes from FPropMeta.
    out_shapes = self._InferOutShapes(args)

    def _CellFn(unused_theta, unused_state0, inputs):
      """Recurrent cell function wrapper of body.FProp."""
      # Sets shapes for both theta and inputs to self.body.FProp.
      for dst, src in zip(inputs.args + inputs.theta.Flatten(),
                          list(args) + theta_stack.Flatten()):
        if src is not None:
          dst.set_shape(tf.TensorShape(src.shape.as_list()[1:]))

      # Runs the actual body.FProp
      fprop_outputs = self.body.FProp(inputs.theta, *inputs.args)
      fprop_outputs = _ToTuple(fprop_outputs)
      assert len(fprop_outputs) == len(out_shapes)
      # Passes fprop outputs to the next layer through state.
      state1 = py_utils.NestedMap(outputs=list(fprop_outputs))
      return state1, py_utils.NestedMap()

    with tf.name_scope(p.name):
      # Initiate state0 with inferred output shapes.
      state0 = py_utils.NestedMap(
          outputs=[tf.zeros(shape, args[0].dtype) for shape in out_shapes])
      # Runs body.FProp p.repeat times using Recurrent.
      acc_states, _ = recurrent.Recurrent(
          theta=py_utils.NestedMap(),
          state0=state0,
          inputs=inputs,
          cell_fn=_CellFn)

      # Retrieves fprop outputs from state1 and sets shapes.
      output_tensors = tuple(acc_states.outputs)
      for out_idx in range(len(output_tensors)):
        output_tensors[out_idx].set_shape(
            tf.TensorShape([p.repeat] + out_shapes[out_idx].as_list()))

      return output_tensors[0] if len(args) == 1 else tuple(output_tensors)
Exemplo n.º 10
0
  def _get_input_shapes(self, *args):
    p = self.params
    if p.nested_map_fprop:
      assert len(args) == 1
      assert isinstance(args[0], py_utils.NestedMap)
      input_tensors = py_utils.Flatten(args[0])
    else:
      input_tensors = _ToTuple(args)
    # Get batch size from the first tensor which is not None.
    mini_batch_size = None
    for input_tensor in input_tensors:
      if input_tensor is not None:
        mini_batch_size = input_tensor.get_shape().as_list()[p.batch_dim]
    assert mini_batch_size is not None
    micro_batch_size = p.micro_batch_size
    if not micro_batch_size:
      if p.num_micro_batches > mini_batch_size:
        p.num_micro_batches = mini_batch_size
      micro_batch_size = mini_batch_size // p.num_micro_batches
    if mini_batch_size is not None:
      if micro_batch_size * p.num_micro_batches != mini_batch_size:
        raise ValueError('micro_batch_size * num_micro_batches != batch_size.')

    input_shapes = ()
    for input_tensor in input_tensors:
      if input_tensor is not None:
        input_shape = input_tensor.get_shape().as_list()
        input_shape[p.batch_dim] = micro_batch_size
        input_shapes += (tf.TensorShape(input_shape),)
      else:
        input_shapes += (None,)

    if p.nested_map_fprop:
      input_shapes = py_utils.Pack(args[0], input_shapes)
    return input_shapes
Exemplo n.º 11
0
    def testRandomChoicePreprocessor(self):
        p = input_preprocessors.RandomChoicePreprocessor.Params()
        p.weight_tensor_key = 'weights'
        # Construct 4 preprocessors each producing a different value.
        p.subprocessors = [
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'value': 1}),
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'value': 2}),
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'value': 3}),
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'value': 4}),
        ]

        preprocessor = p.Instantiate()

        # Construct test data.
        features = py_utils.NestedMap()
        features.weights = tf.constant([1., 2., 3., 4.])
        shapes = py_utils.NestedMap()
        shapes.weights = tf.TensorShape([4])
        dtypes = py_utils.NestedMap()
        dtypes.weights = tf.float32

        # Verify shape / dtypes.
        new_shapes = preprocessor.TransformShapes(shapes)
        new_dtypes = preprocessor.TransformDTypes(dtypes)
        self.assertEqual(new_shapes.value, tf.TensorShape([]))
        self.assertEqual(new_dtypes.value, tf.int64)

        new_features = preprocessor.TransformFeatures(features)

        counts = [0, 0, 0, 0]
        with self.session() as sess:
            # Run 10000 times to get probability distribution.
            for _ in range(10000):
                new_features_np = sess.run(new_features)
                counts[new_features_np.value - 1] += 1

            # Check distribution roughly matches [0.1, 0.2, 0.3, 0.4]
            self.assertTrue(counts[0] > 800 and counts[0] < 1200)
            self.assertTrue(counts[1] > 1800 and counts[1] < 2200)
            self.assertTrue(counts[2] > 2800 and counts[2] < 3200)
            self.assertTrue(counts[3] > 3800 and counts[3] < 4200)
Exemplo n.º 12
0
 def ToTensorShape(self):
     """Converts to a possibly partially specified tf.TensorShape."""
     dims = []
     for d in self._shape:
         if d.is_number and d.is_integer:
             dims.append(int(d))
         else:
             dims.append(None)
     return tf.TensorShape(dims)
Exemplo n.º 13
0
    def testRandomChoicePreprocessorErrors(self):
        p = input_preprocessors.RandomChoicePreprocessor.Params()
        p.weight_tensor_key = 'weights'
        # Subprocessors produce different shapes
        p.subprocessors = [
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'value': 1}),
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'value': [2, 3]}),
        ]
        preprocessor = p.Instantiate()
        # Construct test data.
        shapes = py_utils.NestedMap()
        shapes.weights = tf.TensorShape([2])
        with self.assertRaises(ValueError):
            preprocessor.TransformShapes(shapes)

        # Subprocessors produce different keys
        p.subprocessors = [
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'value': 1}),
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'foo': 2}),
        ]
        preprocessor = p.Instantiate()
        # Construct test data.
        shapes = py_utils.NestedMap()
        shapes.weights = tf.TensorShape([2])
        with self.assertRaises(ValueError):
            preprocessor.TransformShapes(shapes)

        # Subprocessors produce different dtypes
        p.subprocessors = [
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'value': 1}),
            input_preprocessors.ConstantPreprocessor.Params().Set(
                constants={'value': 2.}),
        ]
        preprocessor = p.Instantiate()
        # Construct test data.
        dtypes = py_utils.NestedMap()
        dtypes.weights = tf.float32
        with self.assertRaises(ValueError):
            preprocessor.TransformDTypes(dtypes)
Exemplo n.º 14
0
def _Feature(shape, dtype=tf.float32):
    shape = tf.TensorShape(shape)
    if shape.is_fully_defined():
        return tf.io.FixedLenFeature(shape, dtype=dtype)
    else:
        if shape[:1].is_fully_defined() or not shape[1:].is_fully_defined():
            raise ValueError(f'Unsupported sequence shape {shape}')
        return tf.io.FixedLenSequenceFeature(shape[1:],
                                             dtype=dtype,
                                             allow_missing=True)
Exemplo n.º 15
0
def _GetShapes(tensors, none_shapes=False):
  """Util for getting nested structure of shapes from structure of tensors.

  Args:
    tensors: Structure of Tensors to get shapes for.
    none_shapes: Returns None shapes if true.

  Returns:
    The same structure as tensors but of corresponding `TensorShape` objects.
  """
  shapes = []
  for t in tf.nest.flatten(tensors):
    shape = t.get_shape() if isinstance(t, tf.Tensor) else None
    if none_shapes:
      if shape:
        shapes.append(tf.TensorShape([None] * len(shape)))
      else:
        shapes.append(tf.TensorShape(None))
    else:
      shapes.append(tf.TensorShape(shape))

  return type(tensors)(tf.nest.pack_sequence_as(tensors, shapes))
Exemplo n.º 16
0
 def Shape(self):
     """The expected shape of each field."""
     return py_utils.NestedMap(pose=tf.TensorShape([4, 4]),
                               run_segment=tf.TensorShape([]),
                               run_start_offset=tf.TensorShape([]),
                               time_of_day=tf.TensorShape([]),
                               location=tf.TensorShape([]),
                               weather=tf.TensorShape([]))
Exemplo n.º 17
0
    def testMultipleResultsPerExample(self):

        # Simple batch of 3 examples with 2 items per example in the result
        # modality.
        batch_size = 3
        results_per_example = 2

        inputs = label_lib.ExamplePairs.WithinBatch(
            batch=dict(some_feature=tf.range(batch_size)),
            query_modality='q',
            result_modality='r')

        def example_pair_labeler(_):
            return tf.constant([
                [1, 0, 0],
                [0, 1, X],
                [0, X, 1],
            ],
                               dtype=tf.int64)

        multi_item_labeler = label_lib.MultiItemExampleWrapper(
            example_pair_labeler,
            modality_batch_shapes=dict(q=tf.TensorShape([None]),
                                       r=tf.TensorShape(
                                           [None, results_per_example])))
        labels = multi_item_labeler(inputs)
        self.assertEqual([batch_size, batch_size, results_per_example],
                         labels.shape.as_list())
        # [3, 3, 2]
        expected_labels = [
            # pyformat: disable
            [[1, 1], [0, 0], [0, 0]],
            [[0, 0], [1, 1], [X, X]],
            [[0, 0], [X, X], [1, 1]]
            # pyformat: enable
        ]
        self.assertAllEqual(expected_labels, labels)
Exemplo n.º 18
0
    def _CellFn(unused_theta, unused_state0, inputs):
      """Recurrent cell function wrapper of body.FProp."""
      # Sets shapes for both theta and inputs to self.body.FProp.
      for dst, src in zip(inputs.args + inputs.theta.Flatten(),
                          list(args) + theta_stack.Flatten()):
        if src is not None:
          dst.set_shape(tf.TensorShape(src.shape.as_list()[1:]))

      # Runs the actual body.FProp
      fprop_outputs = self.body.FProp(inputs.theta, *inputs.args)
      fprop_outputs = _ToTuple(fprop_outputs)
      assert len(fprop_outputs) == len(out_shapes)
      # Passes fprop outputs to the next layer through state.
      state1 = py_utils.NestedMap(outputs=list(fprop_outputs))
      return state1, py_utils.NestedMap()
Exemplo n.º 19
0
 def _AssignVar(self, var_op):
     size = var_op.get_attr('dtype').size
     shape = tf.TensorShape(var_op.get_attr('shape'))
     assert self._var_space_pq, ('No ps devices to use.')
     allocated, device = heapq.heappop(self._var_space_pq)
     if shape.num_elements() is None:
         # For vars whose shape aren't known statically, make a constant
         # estimate to avoid introducing more complexities.
         var_bytes = 10 * 1024**2 * size
     else:
         var_bytes = shape.num_elements() * size
     allocated += var_bytes
     heapq.heappush(self._var_space_pq, (allocated, device))
     tf.logging.info('Place variable %s on %s %d(+%d)', var_op.name, device,
                     allocated, var_bytes)
     return device
Exemplo n.º 20
0
 def Shape(self):
     p = self.params
     shape = py_utils.NestedMap(width=tf.TensorShape([1]),
                                height=tf.TensorShape([1]),
                                velo_to_image_plane=tf.TensorShape([3, 4]),
                                velo_to_camera=tf.TensorShape([4, 4]),
                                camera_to_velo=tf.TensorShape([4, 4]))
     if p.decode_image:
         shape.image = tf.TensorShape(
             [self._KITTI_MAX_HEIGHT, self._KITTI_MAX_WIDTH, 3])
     return shape
Exemplo n.º 21
0
 def _AssignVar(self, var_op):
     size = var_op.get_attr('dtype').size
     shape = tf.TensorShape(var_op.get_attr('shape'))
     assert self._var_space_pq, ('No ps devices to use.')
     allocated, device = heapq.heappop(self._var_space_pq)
     if shape.num_elements() is None:
         assert var_op.name.endswith(
             'wb/var'), 'Unexpected name pattern: %s' % var_op.name
         # CuDNN RNN vars shape aren't known statically, decide to make a constant
         # estimate to avoid introducing more complexities.
         allocated += 10 * 1024**2 * size
     else:
         allocated += shape.num_elements() * size
     heapq.heappush(self._var_space_pq, (allocated, device))
     tf.logging.info('Place variable %s on %s %d', var_op.name, device,
                     allocated)
     return device
Exemplo n.º 22
0
    def _CellFn(unused_theta, state0, theta_i):
      """Recurrent cell function wrapper of body.FProp."""
      # Retrieves fprop arguments from state and sets shapes.
      fprop_inputs = _StateToArgs(state0)

      # Sets shapes for theta_i as well.
      for dst, src in zip(theta_i.Flatten(), theta_stack.Flatten()):
        if src is not None:
          dst.set_shape(tf.TensorShape(src.shape.as_list()[1:]))

      # Runs the actual body.FProp
      fprop_outputs = self._body.FProp(theta_i, *fprop_inputs)
      fprop_outputs = _ToTuple(fprop_outputs)
      assert len(fprop_outputs) == len(fprop_inputs)

      # Passes fprop outputs to the next layer through state.
      state1 = _ArgsToState(fprop_outputs)
      return state1, py_utils.NestedMap()
  def testRandomChoicePreprocessor(self):
    p = input_preprocessors.RandomChoicePreprocessor.Params()
    # Construct 4 preprocessors each producing a different value.
    base = input_preprocessors.ConstantPreprocessor.Params()
    c1 = (base.Copy().Set(constants={'value': 1}),
          schedule.Constant.Params().Set(value=1))
    c2 = (base.Copy().Set(constants={'value': 2}),
          schedule.Constant.Params().Set(value=2))
    c3 = (base.Copy().Set(constants={'value': 3}),
          schedule.Constant.Params().Set(value=3))
    c4 = (base.Copy().Set(constants={'value': 4}),
          schedule.Constant.Params().Set(value=4))

    p.subprocessors = [c1, c2, c3, c4]

    # Create global step because schedules depend on it.
    _ = py_utils.GetOrCreateGlobalStepVar()
    preprocessor = p.Instantiate()

    features = py_utils.NestedMap()
    shapes = py_utils.NestedMap()
    dtypes = py_utils.NestedMap()

    # Verify shape / dtypes.
    new_shapes = preprocessor.TransformShapes(shapes)
    new_dtypes = preprocessor.TransformDTypes(dtypes)
    self.assertEqual(new_shapes.value, tf.TensorShape([]))
    self.assertEqual(new_dtypes.value, tf.int64)

    self.evaluate(tf.global_variables_initializer())
    new_features = preprocessor.TransformFeatures(features)

    counts = [0, 0, 0, 0]
    with self.session() as sess:
      # Run 10000 times to get probability distribution.
      for _ in range(10000):
        new_features_np = sess.run(new_features)
        counts[new_features_np.value - 1] += 1

      # Check distribution roughly matches [0.1, 0.2, 0.3, 0.4]
      self.assertTrue(counts[0] > 800 and counts[0] < 1200)
      self.assertTrue(counts[1] > 1800 and counts[1] < 2200)
      self.assertTrue(counts[2] > 2800 and counts[2] < 3200)
      self.assertTrue(counts[3] > 3800 and counts[3] < 4200)
Exemplo n.º 24
0
 def testPSRandomSize(self):
   p = cluster_factory.Cluster.Params()
   p.worker.name = '/job:trainer'
   p.ps.name = '/job:ps'
   p.ps.replicas = 10
   c = cluster_factory.Cluster(p)
   g = tf.Graph()
   vs = []
   np.random.seed(301)
   with g.as_default():
     with tf.device(c.GetPlacer()):
       # Creates 200 variables with different sizes.
       for i in range(200):
         if i % 13:
           size = np.random.randint(10000)
         elif i % 7:
           size = np.random.randint(100)
         else:
           size = np.random.randint(10)
         vs.append(tf.get_variable('x%d' % i, shape=(size)))
       sum_all = tf.add_n([tf.reduce_sum(x) for x in vs])
   # Computes the total size of variables placed on each device.
   total_size = {}  # device name -> size
   for v in vs:
     size = tf.TensorShape(v.op.get_attr('shape')).num_elements()
     if v.device in total_size:
       total_size[v.device] += size
     else:
       total_size[v.device] = size
   for (device, allocated) in zip(
       sorted(total_size),
       [91701, 91361, 90346, 88738, 87240, 89265, 91944, 92472, 88051, 95053]):
     self.assertEqual(total_size[device], allocated)
   self.assertEqual(
       sum_all.device,
       cluster.MakeDeviceString(
           job_name='/job:trainer',
           replica_id=0,
           task_id=0,
           device_name='CPU',
           device_id=0))
Exemplo n.º 25
0
    def testIntraModalLabels(self):
        # Simulate a batch of 4 examples with 2 items each in the 'text' modality.
        batch_size = 4
        items_per_example = 2
        modality = 'text'
        modality_shape = tf.TensorShape([batch_size, items_per_example])
        inputs = label_lib.ExamplePairs.WithinBatch(
            batch=dict(some_feature=tf.range(batch_size)),
            query_modality=modality,
            result_modality=modality)

        def example_pair_labeler(_):
            return tf.constant([
                [1, 0, 0, X],
                [0, 1, 0, 0],
                [0, 0, 1, 0],
                [X, 0, 0, 1],
            ])

        labeler = label_lib.MultiItemExampleWrapper(
            example_pair_labeler,
            modality_batch_shapes={modality: modality_shape})
        labels = labeler(inputs)
        self.assertEqual(modality_shape + modality_shape, labels.shape)
        # The pairwise labels actually have rank 4 (twice the rank of ids), but we
        # compare them in matrix form for easier inspection. There are 8 items
        # total. Each should have a positive label for every other item from the
        # same example. Self-pairs should be ignored (they are neither positive
        # nor negative pairs), as well as pairs from duplicated examples.
        self.assertAllEqual([
            [X, 1, 0, 0, 0, 0, X, X],
            [1, X, 0, 0, 0, 0, X, X],
            [0, 0, X, 1, 0, 0, 0, 0],
            [0, 0, 1, X, 0, 0, 0, 0],
            [0, 0, 0, 0, X, 1, 0, 0],
            [0, 0, 0, 0, 1, X, 0, 0],
            [X, X, 0, 0, 0, 0, X, 1],
            [X, X, 0, 0, 0, 0, 1, X],
        ], tf.reshape(labels, [8, 8]))
Exemplo n.º 26
0
 def Shape(self):
     return py_utils.NestedMap({self.KEY_NAME: tf.TensorShape([])})
Exemplo n.º 27
0
 def Shape(self):
     return py_utils.NestedMap({'foo': tf.TensorShape([])})
Exemplo n.º 28
0
  def BeamSearchDecode(self,
                       theta,
                       encoder_outputs,
                       num_hyps_per_beam_override=0,
                       init_beam_search_state=None,
                       pre_beam_search_step_callback=None,
                       post_beam_search_step_callback=None,
                       max_steps=None):
    """Performs beam-search based decoding.

    Args:
      theta: A NestedMap object containing weights' values of the decoder layer
        and its children layers.
      encoder_outputs: A NestedMap containing encoder outputs to be passed to
        the callbacks.
      num_hyps_per_beam_override: If set to a value <= 0, this parameter is
        ignored. If set to a value > 0, then this value will be used to override
        `p.num_hyps_per_beam`.
      init_beam_search_state: The `InitBeamSearchState` callback. Please refer
        to the class header comments for more details.
      pre_beam_search_step_callback: The `PreBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      post_beam_search_step_callback: The `PostBeamSearchStepCallback` callback.
        Please refer to the class header comments for more details.
      max_steps: maximum beam search steps. If None, use
        self.params.target_seq_len.

    Returns:
      A `BeamSearchDecodeOutput`.
    """
    p = self.params
    num_hyps_per_beam = p.num_hyps_per_beam
    if num_hyps_per_beam_override > 0:
      num_hyps_per_beam = num_hyps_per_beam_override
    if max_steps is None:
      max_steps = p.target_seq_len

    initial_results, other_states = init_beam_search_state(
        theta, encoder_outputs, num_hyps_per_beam)

    num_hyps = tf.shape(initial_results.log_probs)[0]
    num_beams = num_hyps // num_hyps_per_beam

    if 'step_ids' in initial_results:
      # [num_hyps, 1]
      step_ids = tf.ensure_shape(initial_results.step_ids, [None, 1])
    else:
      step_ids = tf.fill([num_hyps, 1],
                         tf.constant(p.target_sos_id, dtype=tf.int32))

    min_score = -1e36
    best_scores = (tf.zeros(shape=[num_beams], dtype=p.dtype) + min_score)
    cumulative_scores = tf.zeros(shape=[num_hyps], dtype=p.dtype)
    in_scores = tf.zeros([max_steps, num_hyps], dtype=p.dtype)
    in_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
    in_prev_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.int32)
    in_done_hyps = tf.zeros([max_steps, num_hyps], dtype=tf.string)
    bs_atten_probs = tf.zeros(
        [max_steps, num_hyps,
         tf.shape(initial_results.atten_probs)[1]],
        dtype=p.dtype)
    cur_step = tf.constant(0, dtype=tf.int32)
    all_done = tf.constant(False, dtype=tf.bool)
    core_bs_states = (best_scores, cumulative_scores, in_scores, in_hyps,
                      in_prev_hyps, in_done_hyps, bs_atten_probs)

    def LoopContinue(cur_step, all_done, unused_step_ids, unused_core_bs_states,
                     unused_other_states_list):
      return tf.logical_and(cur_step < max_steps, tf.logical_not(all_done))

    def LoopBody(cur_step, unused_all_done, step_ids, core_bs_states,
                 other_states_list):
      (cur_step, all_done, new_step_ids, new_bs_states,
       new_other_states) = self._BeamSearchStep(
           theta, encoder_outputs, cur_step, step_ids, core_bs_states,
           other_states.Pack(other_states_list), num_hyps_per_beam,
           pre_beam_search_step_callback, post_beam_search_step_callback)
      return (cur_step, all_done, new_step_ids, new_bs_states,
              new_other_states.Flatten())

    flat_other_states = other_states.Flatten()
    _, _, _, final_bs_states, flat_final_other_states = tf.while_loop(
        LoopContinue,
        LoopBody,
        loop_vars=(cur_step, all_done, step_ids, core_bs_states,
                   flat_other_states),
        parallel_iterations=10,
        back_prop=False,
        swap_memory=False,
        shape_invariants=(tf.TensorShape(cur_step.get_shape()),
                          tf.TensorShape(all_done.get_shape()),
                          tf.TensorShape(step_ids.get_shape()),
                          _GetShapes(core_bs_states),
                          _GetShapes(flat_other_states, none_shapes=True)))
    # [target_seq_len, num_beams * num_hyps_per_beam].
    final_done_hyps = final_bs_states[5]
    final_other_states = other_states.Pack(flat_final_other_states)

    # TODO(rpang): avoid inspecting 'encoder_outputs'.
    source_paddings = encoder_outputs.padding
    if isinstance(source_paddings, py_utils.NestedMap):
      source_seq_lengths = tf.cast(
          tf.round(
              tf.reduce_sum(1.0 - tf.transpose(source_paddings.Flatten()[0]),
                            1)), tf.int32)
    else:
      source_seq_lengths = tf.cast(
          tf.round(tf.reduce_sum(1.0 - tf.transpose(source_paddings), 1)),
          tf.int32)

    # [num_beams, num_hyps_per_beam].
    topk_hyps = ops.top_k_terminated_hyps(
        final_done_hyps,
        source_seq_lengths,
        k=num_hyps_per_beam,
        num_hyps_per_beam=num_hyps_per_beam,
        length_normalization=p.length_normalization,
        coverage_penalty=p.coverage_penalty,
        target_seq_length_ratio=p.target_seq_length_ratio,
        eoc_id=p.target_eoc_id,
        merge_paths=p.merge_paths)
    # [num_beams * num_hyps_per_beam, ...].
    max_seq_length = 0 if isinstance(max_steps, tf.Tensor) else max_steps
    topk_ids, topk_lens, topk_scores = ops.unpack_hyp(
        tf.reshape(topk_hyps, [-1]), max_seq_length=max_seq_length)
    # [num_beams, num_hyps_per_beam].
    topk_scores = tf.reshape(topk_scores, tf.shape(topk_hyps))

    return BeamSearchDecodeOutput(final_done_hyps, topk_hyps, topk_ids,
                                  topk_lens, topk_scores, None,
                                  final_other_states)
Exemplo n.º 29
0
    def Transform(self, dataset):
        """Batches a dataset containing NestedMaps of tensors."""
        p = self.params

        require_sequential_order = p.require_sequential_order or self.do_eval
        seqlen_fn = getattr(self._input_generator, p.seqlen_fn)

        def SetBucketKeys(example):
            example.bucket_keys = seqlen_fn(example)
            return example

        dataset = dataset.map(SetBucketKeys,
                              num_parallel_calls=tf.data.experimental.AUTOTUNE,
                              deterministic=require_sequential_order)

        dataset = dataset.filter(
            lambda x: x.bucket_keys <= p.bucket_upper_bound[-1])

        dataset_structure = py_utils.NestedMap.FromNestedDict(
            tf.data.experimental.get_structure(dataset))

        input_shape_fn = getattr(self._input_generator, p.input_shape_fn)
        padded_shapes = dataset_structure.TransformWithKey(
            lambda k, _: tf.TensorShape(input_shape_fn(k)))
        input_padding_fn = getattr(self._input_generator, p.input_padding_fn)
        padding_values = dataset_structure.TransformWithKey(input_padding_fn)

        dataset_structure.VLog(0, 'dataset_structure:')
        padded_shapes.VLog(0, 'padded_shapes:')

        bucket_batch_limit = [
            batch_utils.scale_split_to_infeed(
                b, self._input_generator.params.use_per_host_infeed)
            for b in p.bucket_batch_limit
        ]
        dataset = dataset.apply(
            tf.data.experimental.bucket_by_sequence_length(
                lambda x: x.bucket_keys,
                # Upper-bound for bucket_by_sequence_length is exclusive, so add 1
                # TODO(jeffreyzhao): There is a off-by-one bug with the upper bound
                # boundary check, so add 2 instead. Remove when fixed.
                [x + 2 for x in p.bucket_upper_bound],
                bucket_batch_limit + [1],
                padded_shapes=padded_shapes,
                padding_values=padding_values,
                pad_to_bucket_boundary=True,
                drop_remainder=py_utils.use_tpu()))

        if py_utils.use_tpu():
            # Set static shapes for TPU.
            if min(bucket_batch_limit) != max(bucket_batch_limit):
                raise ValueError('TPU requires constant batch sizes.')
            else:
                b = bucket_batch_limit[0]

                def SetShape(element):
                    for t in element.Flatten():
                        t.set_shape((b, ) + t.shape[1:])
                    return element

                dataset = dataset.map(
                    SetShape,
                    num_parallel_calls=tf.data.experimental.AUTOTUNE,
                    deterministic=require_sequential_order)

        return dataset
Exemplo n.º 30
0
 def __init__(self, batch_shape):
     batch_shape = tf.TensorShape(batch_shape)
     assert batch_shape.rank >= 1
     batch_shape[1:].assert_is_fully_defined()
     self._batch_shape = batch_shape
     self._is_no_op = batch_shape == tf.TensorShape([None])