def _testExtendStep(self, sess, dec, encoder_outputs, tgts, num_hyps): p = self._DecoderParams() # Infer true source encoder length from the padding. src_enc_len = tf.reduce_sum(1 - encoder_outputs.padding, axis=0) src_enc_len = dec._ExpandToNumHyps(src_enc_len, num_hyps) # Run Fprop fprop_out = dec._FProp(dec.theta, encoder_outputs, tgts) l_out1 = fprop_out.softmax_input attention_map_fprop = fprop_out.attention # run ExtendStep prefix_states = py_utils.NestedMap() for i in range(6): layer_i_states = py_utils.NestedMap() # The first dim is for the decode step (sequence length). # Here's 0 as placeholder layer_i_states.key = tf.zeros([0, self.tgt_batch, p.model_dim]) layer_i_states.value = tf.zeros([0, self.tgt_batch, p.model_dim]) prefix_states['layer_%i' % i] = layer_i_states l_out2 = [] per_step_atten_probs = [] for i in range(5): l_i_out, prefix_states, atten_probs = dec.ExtendStep( dec.theta, encoder_outputs, tgts.ids[:, i], i, prefix_states) l_out2.append(l_i_out) per_step_atten_probs.append(atten_probs) l_out2 = tf.stack(l_out2) bs_atten_probs = tf.stack(per_step_atten_probs) attention_map_bs = py_utils.NestedMap(probs=bs_atten_probs) def _TransposeAttentions(x): return tf.transpose(x, [1, 0, 2]) attention_map_bs = attention_map_bs.Transform(_TransposeAttentions) tf.global_variables_initializer().run() l_out1_v, l_out2_v, attention_map_fprop_v, attention_map_bs_v, src_enc_len_v = sess.run( [l_out1, l_out2, attention_map_fprop, attention_map_bs, src_enc_len]) # Ensure that FProp and BeamSearch output are the same. self.assertAllClose(l_out1_v, l_out2_v, rtol=1e-05, atol=1e-05) # Ensure that FProp and BeamSearch attention matrix is the same. self.assertAllClose(attention_map_fprop_v.probs, attention_map_bs_v.probs) print('attention map', attention_map_fprop_v.probs) # End-to-end test attention probs -- ensure EOS symbol and positions # behind EOS have 0 probability. for i in range(0, len(src_enc_len_v)): pos = int(src_enc_len_v[i]) - 1 self.assertEqual( np.count_nonzero(attention_map_fprop_v.probs[i][:, pos:]), 0)
def _TransformerSingleSourceInputs(self, depth=3, dtype=tf.float32): np.random.seed(NUMPY_RANDOM_SEED) source_vecs = tf.stack( [tf.constant(np.random.rand(2, depth), dtype=dtype) for _ in range(5)]) source_padding = tf.transpose( tf.constant([[0, 0, 1, 1, 0], [1, 0, 0, 0, 1]], dtype=dtype)) aux_source_vecs = tf.stack( [tf.constant(np.random.rand(2, depth), dtype=dtype) for _ in range(7)]) aux_source_paddings = tf.transpose( tf.constant( [[0, 1, 0, 1, 0, 1, 0], [1, 0, 1, 0, 1, 0, 1]], dtype=dtype)) return source_vecs, source_padding, aux_source_vecs, aux_source_paddings
def BatchMakeRotationMatrix(yaw, clockwise=False): """Create a Nx3x3 rotation matrix from yaw. Args: yaw: float tensor representing a yaw angle in radians. clockwise: Whether to have the rotation be applied clockwise (True) or counter-clockwise (False). Defaults to counter-clockwise to maintain same semantics to MakeRotationMatrix. Returns: A [N, 3, 3] tensor corresponding to a rotation matrix. """ if clockwise: yaw = -yaw cos = tf.cos(yaw) sin = tf.sin(yaw) zero = tf.zeros_like(cos) one = tf.ones_like(cos) rotation_matrix = tf.stack( [cos, -sin, zero, sin, cos, zero, zero, zero, one], axis=-1) # pyformat: disable rotation_matrix = tf.reshape(rotation_matrix, [-1, 3, 3]) return rotation_matrix
def _process(record): num, = tf.py_func(str_to_num, [record], [tf.float32]) num = tf.stack([num, tf.square(num)]) if use_nested_map: return py_utils.NestedMap(record=record, num=num), 1 else: return [record, num], 1
def SplitTensors(xs, num_splits): """Splits tensors in `xs` evenly into num_splits along the 1st dimenion. Args: xs: A tuple of tensors. Each tensor's 1st dimension is the same size. num_splits: A python integer. Returns: A tuple of lists of tensors, num elements in the tuple = len(xs). i-th element in each list corresponds to i-th split of each tensor in xs along the first dimension of each tensor. """ # assert first dim of all tensors in xs is equal batch_dims = [tf.shape(x)[0] for x in xs] all_batch_dims = tf.stack(batch_dims) all_batch_dims = py_utils.with_dependencies([ py_utils.assert_equal( all_batch_dims, tf.shape(xs[0])[0], message='first dim of tensors in xs must match'), py_utils.assert_greater_equal( tf.shape(xs[0])[0], num_splits, message='first dim of tensors in xs must be greater than num_splits') ], all_batch_dims) splits = ComputeSplits(tf.shape(xs[0])[0], num_splits) # add the above assertion into the compute graph splits = py_utils.with_dependencies([all_batch_dims], splits) split_xs = [tf.split(axis=0, num_or_size_splits=splits, value=x) for x in xs] return split_xs
def _EncodeRandomJpegs(sizes): images = [ tf.cast( tf.random.uniform([height, width, 3], maxval=256, dtype=tf.int32), tf.uint8) for height, width in sizes ] return tf.stack([tf.io.encode_jpeg(image) for image in images])
def SequenceAppendToken(x, x_paddings, token, extend=False): """Appends <token> to sequence `x`. Args: x: A sequence of tokens of shape [batch_size, x_len_max]. x_paddings: The paddings of `x`. token: The token to append (of type integer). extend: Whether to extend `x` along the length dimension, this must be true for any sequence length in `x` that is `x_len_max` or else an invalid sequence will be emitted. Returns: A tuple. - The new sequence, Tensor of shape [batch_size, x_len_max]. - The new paddings, Tensor of shape [batch_size, x_len_max]. """ batch_size = py_utils.GetShape(x)[0] x_len = tf.cast(tf.round(tf.reduce_sum(1 - x_paddings, 1)), tf.int32) if extend: x = tf.pad(x, [[0, 0], [0, 1]]) # Mask all invalid entries of `x` to 0. x *= tf.sequence_mask(x_len, py_utils.GetShape(x)[1], x.dtype) # Append the <token> based on `x_len`. x += tf.scatter_nd(tf.stack([tf.range(batch_size), x_len], axis=1), tf.cast(tf.fill([batch_size], token), x.dtype), py_utils.GetShape(x)) x_paddings = 1 - tf.sequence_mask(x_len + 1, py_utils.GetShape(x)[1], x_paddings.dtype) return x, x_paddings
def CreateDenseCoordinates(self, ranges): """Create a matrix of coordinate locations corresponding to a dense grid. Example: To create (x, y) coordinates corresponding over a 10x10 grid with step sizes 1, call ``CreateDenseCoordinates([(1, 10, 10), (1, 10, 10)])``. Args: ranges: A list of 3-tuples, each tuple is expected to contain (min, max, num_steps). Each list element corresponds to one dimesion. Each tuple will be passed into np.linspace to create the values for a single dimension. Returns: tf.float32 tensor of shape [total_points, len(ranges)], where total_points = product of all num_steps. """ total_points = int(np.prod([r_steps for _, _, r_steps in ranges])) cycle_steps = total_points stack_coordinates = [] for r_start, r_stop, r_steps in ranges: values = tf.lin_space(tf.cast(r_start, tf.float32), tf.cast(r_stop, tf.float32), tf.cast(r_steps, tf.int32)) cycle_steps //= r_steps gather_idx = (tf.range(total_points) // cycle_steps) % r_steps stack_coordinates.append(tf.gather(values, gather_idx)) return tf.stack(stack_coordinates, axis=1)
def testStep(self): p = self._testParams(dtype=tf.float32) with self.session(use_gpu=True) as sess: lm = p.Instantiate() inputs, paddings, _ = self._testInputs(dtype=tf.float32, last_padding=0.0) sess.run(tf.global_variables_initializer()) xent_output, _ = lm.FPropDefaultTheta(inputs=inputs, paddings=paddings) logits1 = xent_output.logits time, batch = 5, 3 prefix_states = lm.zero_state(lm.theta, batch) logits2 = [] for i in range(time): l_i_out, prefix_states = lm.Step(lm.theta, inputs[i, :, :], paddings[i, :], prefix_states) logits2.append(l_i_out.logits) logits2 = tf.stack(logits2) tf.global_variables_initializer().run() logits1_v, logits2_v = sess.run([logits1, logits2]) print('xformer logits1_v', logits1_v) print('xformer logits2_v', logits2_v) self.assertAllClose(logits1_v, logits2_v)
def Step(recurrent_theta, state0, inputs): """Computes one decoder step.""" del inputs with tf.name_scope('single_sampler_step'): # Compute logits and states. bs_result, bs_state1 = pre_step_callback( recurrent_theta.theta, recurrent_theta.encoder_outputs, tf.expand_dims(state0.ids, 1), # [batch, 1]. state0.bs_state, num_hyps_per_beam=1) batch = tf.shape(bs_result.log_probs)[0] state1 = py_utils.NestedMap(timestep=state0.timestep + 1) state1.logits = bs_result.log_probs # Sample ids from logits. [batch]. state1.ids = tf.reshape( tf.random.stateless_multinomial( state1.logits / p.temperature, num_samples=1, seed=tf.stack([recurrent_theta.random_seed, state0.timestep]), output_dtype=state0.ids.dtype, name='sample_next_id'), [batch]) if 'is_last_chunk' in bs_result and p.target_eoc_id >= 0: state1.ids = tf.where( tf.logical_and(bs_result.is_last_chunk, tf.equal(state1.ids, p.target_eoc_id)), tf.fill(tf.shape(state1.ids), p.target_eos_id), state1.ids) state1.bs_state = post_step_callback(recurrent_theta.theta, recurrent_theta.encoder_outputs, state1.ids, bs_state1) return state1, py_utils.NestedMap()
def _IgnoreZCoordinate(bboxes): """Set z center to 0, and z dimension to 1.""" num_bboxes = py_utils.GetShape(bboxes, 1)[0] return tf.stack([ bboxes[:, 0], bboxes[:, 1], tf.zeros((num_bboxes,)), bboxes[:, 3], bboxes[:, 4], tf.ones((num_bboxes,)), bboxes[:, 6] ], axis=1) # pyformat: disable
def _process(source_id, record): num, = tf.py_func(str_to_num, [record], [tf.float32]) num = tf.stack([num, tf.square(num)]) if use_nested_map: return py_utils.NestedMap( source_id=source_id, record=record, num=num), bucket_fn(num) else: return [source_id, record, num], bucket_fn(num)
def Update(self, new_value): state0 = self.GetValue() state1 = tf.stack([ state0[0] + new_value[0], tf.minimum(state0[1], new_value[1]), tf.maximum(state0[2], new_value[2]), ]) self.SetValue(state1)
def _MaybeStackExtraTheta(theta, all_vars, repeat): var_set = set([key for key, _ in all_vars.FlattenItems()]) values = [] for key, value in theta.FlattenItems(): if key not in var_set and value is not None: # Replicate non-variable theta by p.repeat times. value = tf.stack([value] * repeat) values.append(value) return theta.Pack(values)
def _testInputs(self, depth=3, dtype=tf.float32): np.random.seed(505837249) source_vecs = tf.stack([ tf.constant(np.random.rand(2, depth), dtype=dtype) for _ in range(5) ]) source_padding = tf.constant([[0, 0, 1, 1, 0], [1, 0, 0, 0, 1]], dtype=dtype) aux_source_vecs = tf.stack([ tf.constant(np.random.rand(2, depth), dtype=dtype) for _ in range(7) ]) aux_source_paddings = tf.constant( [[0, 1, 0, 1, 0, 1, 0], [1, 0, 1, 0, 1, 0, 1]], dtype=dtype) source_padding = tf.transpose(source_padding) aux_source_paddings = tf.transpose(aux_source_paddings) return (source_vecs, source_padding, aux_source_vecs, aux_source_paddings)
def _testElmanHelper(self, seqlen, use_grad, stop_fn=None): with self.session() as sess: tf.set_random_seed(342462) batch = 3 dims = 4 theta = py_utils.NestedMap() theta.w = self.Rand([2 * dims, dims]) theta.b = self.Rand([dims]) state0 = py_utils.NestedMap() state0.h = self.Rand([batch, dims]) inputs = py_utils.NestedMap() inputs.x = self.Rand([seqlen, batch, dims]) # Static unrolled. s = state0 out = [] for i in range(seqlen): inp = py_utils.NestedMap() inp.x = inputs.x[i, :] s, _ = self.Elman(theta, s, inp) out += [s.h] if stop_fn and stop_fn(i + 1, theta, s): out += [ tf.zeros_like(out[-1]) for _ in range(seqlen - i - 1) ] break acc0, final0 = tf.stack(out), s.h loss0 = tf.reduce_sum(acc0) + tf.reduce_sum(final0) (dw0, db0, dh0, di0) = tf.gradients(loss0, [theta.w, theta.b, state0.h, inputs.x]) # Uses the Recurrent() library. acc1, final1 = recurrent.Recurrent( theta=theta, state0=state0, inputs=inputs, cell_fn=self.Elman, cell_grad=self.ElmanGrad if use_grad else None, stop_fn=stop_fn) acc1, final1 = acc1.h, final1.h loss1 = tf.reduce_sum(acc1) + tf.reduce_sum(final1) (dw1, db1, dh1, di1) = tf.gradients(loss1, [theta.w, theta.b, state0.h, inputs.x]) # Fetches a bunch of values and compare them. (acc0, acc1, final0, final1, dw0, dw1, db0, db1, dh0, dh1, di0, di1) = sess.run([ acc0, acc1, final0, final1, dw0, dw1, db0, db1, dh0, dh1, di0, di1 ]) self.assertAllClose(acc0, acc1) self.assertAllClose(final0, final1) self.assertAllClose(dw0, dw1) self.assertAllClose(db0, db1) self.assertAllClose(dh0, dh1) self.assertAllClose(di0, di1)
def _TransformerAttentionLayerInputs(self, input_dim=4, dtype=tf.float32): np.random.seed(6348575) query_vec = tf.transpose( tf.stack([ tf.constant(np.random.rand(2, input_dim), dtype=dtype) for _ in range(5) ]), [1, 0, 2]) paddings = tf.constant([[0, 0, 1, 1, 0], [1, 0, 0, 0, 1]], dtype=dtype) aux_vec = tf.transpose( tf.stack([ tf.constant(np.random.rand(2, input_dim), dtype=dtype) for _ in range(7) ]), [1, 0, 2]) aux_paddings = tf.constant([[0, 1, 0, 1, 0, 1, 0], [1, 0, 1, 0, 1, 0, 1]], dtype=dtype) segment_mask = tf.zeros([2, 1, 5, 5]) return query_vec, paddings, aux_vec, aux_paddings, segment_mask
def _testInputs(self, depth=3, dtype=tf.float32): np.random.seed(505837249) source_vecs = tf.stack( [tf.constant(np.random.rand(2, depth), dtype=dtype) for _ in range(5)]) source_padding = tf.constant([[0, 0, 1, 1, 0], [1, 0, 0, 0, 1]], dtype=dtype) aux_source_vecs = tf.stack( [tf.constant(np.random.rand(2, depth), dtype=dtype) for _ in range(7)]) aux_source_paddings = tf.constant( [[0, 1, 0, 1, 0, 1, 0], [1, 0, 1, 0, 1, 0, 1]], dtype=dtype) source_padding = tf.transpose(source_padding) aux_source_paddings = tf.transpose(aux_source_paddings) input_task_arr = np.array([[0] * depth, [0] * depth]) tgt_task_arr = np.array([[0] * depth] * 3) input_tasks = tf.constant(input_task_arr.tolist(), dtype=tf.int32) tgt_tasks = tf.constant(tgt_task_arr.tolist(), dtype=tf.int32) return (source_vecs, source_padding, aux_source_vecs, aux_source_paddings, input_tasks, tgt_tasks)
def NMSIndices(self, bboxes, scores, max_output_size, nms_iou_threshold=0.3, score_threshold=0.01): """Apply NMS to a series of 3d bounding boxes in 7-DOF format. Args: bboxes: A [num_boxes, 7] floating point Tensor of bounding boxes in [x, y, z, dx, dy, dz, phi] format. scores: A [num_boxes] floating point Tensor containing box scores. max_output_size: Maximum number of boxes to predict per input. nms_iou_threshold: IoU threshold to use when determining whether two boxes overlap for purposes of suppression. score_threshold: The score threshold passed to NMS that allows NMS to quickly ignore irrelevant boxes. Returns: The NMS indices and the mask of the padded indices. """ bboxes = py_utils.HasShape(bboxes, [-1, 7]) # Extract x, y, w, h, then convert to extrema. # # Note that we drop the rotation angle because we don't have an NMS # operation that takes rotation into account. bboxes_2d = tf.stack( [bboxes[:, 0], bboxes[:, 1], bboxes[:, 3], bboxes[:, 4]], axis=-1) bboxes_extrema = geometry.XYWHToBBoxes(bboxes_2d) # Compute NMS with padding; we use the padded version so this function can # be used in a map_fn. This function returns the scalar number of boxes # for each example. # # We use an IoU threshold of 0.3 since our anchor boxes have rotations # that make the default IoU threshold of 0.5 possibly too high. nms_index_padded, num_valid = tf.image.non_max_suppression_padded( bboxes_extrema, scores, iou_threshold=nms_iou_threshold, max_output_size=max_output_size, score_threshold=score_threshold, pad_to_max_output_size=True) # Return the mask of valid indices instead of just a scalar number. mask = tf.concat( [tf.ones([num_valid]), tf.zeros([max_output_size - num_valid])], axis=0) nms_index_padded = tf.where(mask > 0, nms_index_padded, tf.zeros_like(nms_index_padded)) return nms_index_padded, mask
def _MakeTransformTestRotationMatrices(self, batch_size): # Make a batch of 4x4 transformation matrices that only has rotation around # the z-axis (world rotation). rot_matrices = [] for _ in range(batch_size): rot_matrix = geometry._MakeRotationMatrix(tf.random_uniform([]), 0., 0.) # Embed rotation matrix into a 4 x 4 matrix rot_matrix = tf.pad(rot_matrix, [[0, 1], [0, 1]]) + tf.diag([0, 0, 0, 1.]) rot_matrices.append(rot_matrix) transforms = tf.stack(rot_matrices, axis=0) return transforms
def _MakeTransformTestTranslationMatrices(self, batch_size): # Make a batch of 4x4 transformation matrices that translate in all # directions. translation_matrices = [] for _ in range(batch_size): translation_matrix = tf.random_uniform([3, 1]) translation_matrix = tf.pad(translation_matrix, [[0, 1], [3, 0]]) translation_matrix += tf.diag([1., 1., 1., 1.]) translation_matrices.append(translation_matrix) transforms = tf.stack(translation_matrices, axis=0) return transforms
def Step(recurrent_theta, state0, inputs): """Computes one decoder step.""" if p.use_recurrent: del inputs with tf.name_scope('single_sampler_step'): # Compute logits and states. bs_result, bs_state1 = pre_step_callback( decoder_theta, recurrent_theta.encoder_outputs, tf.expand_dims(state0.ids, 1), # [batch, 1]. state0.bs_state, num_hyps_per_beam=p.num_hyps_per_beam) batch = tf.shape(bs_result.log_probs)[0] state1 = py_utils.NestedMap(timestep=state0.timestep + 1) state1.logits = bs_result.log_probs if p.top_k > 0: topk_logits, topk_ids = tf.math.top_k(state1.logits, k=p.top_k) sample_logits = tf.nn.log_softmax( topk_logits) if p.top_k_renormalize else topk_logits else: sample_logits = state1.logits # Sample ids from logits. [batch]. ids = tf.reshape( tf.random.stateless_categorical( sample_logits / p.temperature, num_samples=1, seed=tf.stack( [recurrent_theta.random_seed, state0.timestep]), dtype=state0.ids.dtype, name='sample_next_id'), [batch]) state1.ids = tf.gather(topk_ids, ids, axis=1, batch_dims=1) if p.top_k > 0 else ids if 'is_last_chunk' in bs_result and p.target_eoc_id >= 0: state1.ids = tf.where( tf.math.logical_and( bs_result.is_last_chunk, tf.equal(state1.ids, p.target_eoc_id)), tf.fill(tf.shape(state1.ids), p.target_eos_id), state1.ids) state1.bs_state = post_step_callback( decoder_theta, recurrent_theta.encoder_outputs, state1.ids, bs_state1) if p.use_recurrent: return state1, py_utils.NestedMap() else: inputs.ids = inputs.ids.write(state0.timestep, state1.ids) inputs.logits = inputs.logits.write(state0.timestep, state1.logits) return (recurrent_theta, state1, inputs)
def BBoxCorners(bboxes): """Extract the corner points from a 7-DOF bbox representation. Args: bboxes: A [batch, num_boxes, 7] floating point bounding box representation ([x, y, z, dx, dy, dz, phi]). Returns: A [batch, num_boxes, 8, 3] floating point Tensor containing the corner (x, y, z) points for every bounding box. """ # Code adapted from vale/soapbox codebase. # # Corners in normalized box frame (unit cube centered at origin). # # Dimensions is [length, width, height]. corners = tf.constant([ [0.5, 0.5, 0.5], # top [-0.5, 0.5, 0.5], # top [-0.5, -0.5, 0.5], # top [0.5, -0.5, 0.5], # top [0.5, 0.5, -0.5], # bottom [-0.5, 0.5, -0.5], # bottom [-0.5, -0.5, -0.5], # bottom [0.5, -0.5, -0.5], # bottom ]) batch, nb, _ = py_utils.GetShape(bboxes, 3) # Extract location, dimension, and rotation. location = bboxes[:, :, :3] dimensions = bboxes[:, :, 3:6] phi_world = bboxes[:, :, 6] # Convert rotation_phis into rotation matrices along unit z. cos = tf.cos(phi_world) sin = tf.sin(phi_world) zero = tf.zeros_like(cos) one = tf.ones_like(cos) rotations_world = tf.reshape( tf.stack([cos, -sin, zero, sin, cos, zero, zero, zero, one], axis=2), [batch, nb, 3, 3]) # Create axis-aligned corners from length/width/height. corners = tf.einsum('bni,ji->bnji', dimensions, corners) # Rotate the corners coordinates to the rotated world frame. corners = tf.einsum('bnij,bnkj->bnki', rotations_world, corners) # Translate corners to the world location. corners = corners + tf.reshape(location, (batch, nb, 1, 3)) return corners
def testEvolvedTransformerDecoderLayerExtendStep(self): with self.session(use_gpu=True) as sess: np.random.seed(6348575) depth = 4 p = GPipeEvolvedTransformerDecoderLayer.Params() p.name = 'gpipe_evolved_transformer_decoder' p.source_dim = depth p.has_aux_atten = True p.mask_self_atten = True p.tr_double_heads_atten_tpl.num_attention_heads = 2 p.tr_atten_tpl.num_attention_heads = 2 p.transformer_tpl.tr_atten_tpl.num_attention_heads = 2 et_decoder = GPipeEvolvedTransformerDecoderLayer(p) (source_vecs, _, aux_vecs, aux_paddings) = self._testInputs(depth=depth) source_padding = tf.zeros([5, 2]) h1 = et_decoder.FPropDefaultTheta( aux_vecs, aux_paddings, source_vecs, source_padding, None, None, None, None, )[2] h2 = [] double_head_attention_states = py_utils.NestedMap( key=tf.zeros([0, 2, 4]), value=tf.zeros([0, 2, 4])) transformer_layer_states = py_utils.NestedMap( key=tf.zeros([0, 2, 4]), value=tf.zeros([0, 2, 4])) branched_convs_input = tf.zeros([0, 2, 4]) prefix_states = py_utils.NestedMap( double_head_attention_states=double_head_attention_states, transformer_layer_states=transformer_layer_states, branched_convs_input=branched_convs_input) for i in range(5): h, _, prefix_states = et_decoder.ExtendStep( et_decoder.theta, source_vecs[i, :, :], prefix_states, aux_vecs, aux_paddings) h2.append(h) h2 = tf.stack(h2) tf.global_variables_initializer().run() h1_v, h2_v = sess.run([h1, h2]) self.assertAllClose(h1_v, h2_v)
def QuantizeTensors(self, t_name, ts, eval_only=False): p = self.params # Always straddle a real zero point. if self.do_eval: # At eval/inference time, use the memorized range. # Important: Don't capture these variables in training mode so as to # avoid extra/unnecessary captures. min_var = self._GetQStateVar(t_name, 'min') max_var = self._GetQStateVar(t_name, 'max') return [ self._MaybeFakeQuant(t, min_var, max_var, num_bits=p.bits) for t in ts ] else: # At training time, use the batch calculated min/max. accumulator_name = self._GetAccumulatorNameForTensor(t_name) # Calculate min/max for all tensors. batch_min = 0.0 batch_max = 0.0 for t in ts: batch_min = tf.minimum(tf.reduce_min(t), batch_min) batch_max = tf.maximum(tf.reduce_max(t), batch_max) # New state. state1 = tf.stack([1.0, batch_min, batch_max]) self.accumulators[accumulator_name].Update(state1) # Results. ts_out = [] for i, t in enumerate(ts): if eval_only: # If only quantizing at eval time, still record ranges as above # but don't quantize. quant_t = t else: # If quantizing during training, skip quantization if it produces # NANs. Sometimes early in the training process, things are unstable # and ranges can produce numerical instability that makes it # impossible to perform a fake_quant. quant_t = self._MaybeFakeQuant(t, batch_min, batch_max, num_bits=p.bits) # TODO(laurenzo): Plumb quant_t_has_nans through state and report. quant_t_has_nans = tf.math.is_nan(quant_t) quant_t = tf.where(quant_t_has_nans, t, quant_t) ts_out.append(quant_t) summary_utils.histogram( '%s/%s_%d' % (self._qvars_scope.name, t_name, i), t) return ts_out
def SequenceConcat(x, x_paddings, y, y_paddings, pad=0): """Concats sequence `x` with sequence `y`. This function is length aware (based off the paddings). Args: x: A sequence of tokens of shape [batch_size, x_len_max]. x_paddings: The paddings of `x`. y: A sequence of tokens of shape [batch_size, y_len_max]. y_paddings: The paddings of `y`. pad: The <pad> token to fill the concatenated sequence (of type integer). Returns: A tuple. - Concatenation of `x` and `y` of shape [batch_size, x_len_max + y_len_max]. - Paddings of the concatenation of shape [batch_size, x_len_max + y_len_max]. """ # Get the length (w/ eos). x_len = tf.cast(tf.round(tf.reduce_sum(1 - x_paddings, 1)), tf.int32) y_len = tf.cast(tf.round(tf.reduce_sum(1 - y_paddings, 1)), tf.int32) batch_size = py_utils.GetShape(x)[0] y_len_max = py_utils.GetShape(y)[1] # Pad `x` with necessary <pad>. x = tf.concat([x, tf.fill(py_utils.GetShape(y), pad)], 1) # Replace all <pad> with 0. x = tf.where(tf.not_equal(x, pad), x, tf.fill(py_utils.GetShape(x), 0)) # Compute the write indices of `y` in `xy`. indices = tf.stack([ tf.tile(tf.expand_dims(tf.range(batch_size), 1), [1, y_len_max]), (tf.tile(tf.expand_dims(tf.range(y_len_max), 0), [batch_size, 1]) + tf.expand_dims(x_len, 1)), ], 2) xy = x + tf.scatter_nd(indices, y, py_utils.GetShape(x)) # We need to remap all <pad> to `pad`. xy = tf.where( tf.less(tf.expand_dims(tf.range(py_utils.GetShape(xy)[1]), 0), tf.expand_dims(x_len + y_len, 1)), xy, tf.fill(py_utils.GetShape(xy), pad)) xy_paddings = 1 - tf.sequence_mask(x_len + y_len, py_utils.GetShape(xy)[1], x_paddings.dtype) return xy, xy_paddings
def _global_seed_from_inputs(input_floats): """Generates a random seed tensor based on input floats and mode key. Args: input_floats: a set of float input tensors that are derived from the input data (for example, input tokens). The important thing is that these are usually different for each batch. Returns: A tensor of shape=[2] with integer seed tensors derived from the inputs. """ timestamp = tf.math.floormod( tf.cast(1000.0 * tf.timestamp(), dtype=tf.int64), 10000000000) input_sum = tf.cast(tf.reduce_sum(tf.math.abs(input_floats)), dtype=tf.int64) return tf.stack([timestamp + input_sum, timestamp - input_sum], axis=-1)
def _Merge(*xs): """Broadcast all dimensions except the last, and concat on last dim.""" # Stack all shapes and take max on each dimension to get leading shape. leading_shape = tf.stack([tf.shape(x)[:-1] for x in xs]) leading_shape = tf.reduce_max(leading_shape, axis=0) # Broadcast each x. broadcast_xs = [] for x in xs: broadcast_shape = tf.concat([leading_shape, tf.shape(x)[-1:]], axis=0) broadcast_xs.append(tf.broadcast_to(x, broadcast_shape)) # Concat on last dimension. concat_xs = tf.concat(broadcast_xs, axis=-1) return concat_xs
def _check_paddings(self, paddings): with tf.name_scope('check_paddings'): unpacked_paddings = tf.unstack(paddings) non_decr = [] for t in unpacked_paddings: non_d = tf.is_non_decreasing(t) non_decr.append(non_d) all_non_decr = tf.stack(non_decr) paddings = py_utils.with_dependencies([ tf.assert_equal(tf.reduce_any(tf.equal(paddings, 0.0)), True, message='must have at least one zero value.'), tf.assert_equal( all_non_decr, True, message='must be non-decreasing') ], paddings) return paddings
def testTransformerLayerExtendStep(self): with self.session(use_gpu=True) as sess: depth = 4 np.random.seed(6348575) p = GPipeTransformerLayer.Params() p.name = 'transformer' p.source_dim = depth p.has_aux_atten = True p.mask_self_atten = True p.tr_fflayer_tpl.hidden_dim = 7 p.tr_atten_tpl.num_attention_heads = 2 transformer = GPipeTransformerLayer(p) (source_vecs, _, aux_vecs, aux_paddings, input_tasks, tgt_tasks) = self._testInputs(depth=depth) source_padding = tf.zeros([5, 2]) output1 = transformer.FPropDefaultTheta(aux_vecs, aux_paddings, source_vecs, source_padding, None, None, input_tasks, tgt_tasks) h1 = output1[2] out_src_task, out_tgt_task = output1[-2], output1[-1] h2 = [] cached_source_vecs = tf.zeros([0, 2, 4]) cached_source_contexts = tf.zeros([0, 2, 4]) prefix_states = py_utils.NestedMap(key=cached_source_vecs, value=cached_source_contexts) for i in range(5): h, _, prefix_states = transformer.ExtendStep( transformer.theta, source_vecs[i, :, :], prefix_states, aux_vecs, aux_paddings) h2.append(h) h2 = tf.stack(h2) tf.global_variables_initializer().run() h1_v, h2_v = sess.run([h1, h2]) self.assertAllClose(h1_v, h2_v) self.assertAllClose(out_src_task, input_tasks) self.assertAllClose(out_tgt_task, tgt_tasks) self.assertAllClose( h1_v[2][1], [1.10429943, -1.64884555, 0.15726769, -0.00250494])