def FProp(self, theta, inputs): """Apply projection to inputs. Args: theta: A NestedMap object containing weights' values of this layer and its children layers. inputs: The inputs tensor. Shaped [..., input_dims]. Returns: Projected inputs. """ p = self.params with tf.name_scope(p.name): computation_cost.Add( self, 'flops', tf.reduce_prod(tf.to_int64(tf.shape(inputs)[:-1])) * tf.to_int64( symbolic.EvalExpr(symbolic.TENSOR_VALUES, p.input_dims * p.output_dims)) * 2) use_tpu = py_utils.use_tpu() if use_tpu and inputs.shape is not None and inputs.shape.rank < 26: # Avoids reshape if feasible and uses Einsum. if inputs.shape.rank == 2: return tf.matmul(inputs, theta.w) else: s = ''.join([chr(x) for x in range(97, 123)]) # abc...xyz r = inputs.shape.rank return tf.einsum('{0}y,yz->{0}z'.format(s[:r - 1]), inputs, theta.w) input_dim = py_utils.GetShape(inputs)[-1] act = tf.matmul(tf.reshape(inputs, [-1, input_dim]), theta.w) output_dim = tf.shape(theta.w)[-1] act = tf.reshape(act, tf.concat([tf.shape(inputs)[:-1], [output_dim]], axis=0)) return act
def ComputeLoss(self, theta, predictions, input_batch): p = self.params batch = tf.shape(input_batch.data)[0] act = predictions.act with tf.colocate_with(act): tf.logging.info("{}'s device: {}".format(act, act.device)) # Softmax labels = tf.to_int64(input_batch.label) onehot_labels = tf.one_hot(labels, p.softmax.num_classes) if p.label_smoothing > 0: smooth_positives = 1.0 - p.label_smoothing smooth_negatives = p.label_smoothing / p.softmax.num_classes onehot_labels = onehot_labels * smooth_positives + smooth_negatives xent = self.softmax.FProp(theta=theta.softmax, inputs=act, class_weights=input_batch.weight, class_probabilities=onehot_labels) self._AddSummary(input_batch, xent.per_example_argmax) rets = { 'loss': (xent.avg_xent, batch), 'log_pplx': (xent.avg_xent, batch), 'num_preds': (batch, 1), } if p.is_eval or p.compute_accuracy_for_training: acc1 = self._Accuracy(1, xent.logits, labels, input_batch.weight) acc5 = self._Accuracy(5, xent.logits, labels, input_batch.weight) rets.update(accuracy=(acc1, batch), acc5=(acc5, batch)) return rets, {}
def ComputeWer(hyps, refs): """Computes word errors in hypotheses relative to reference transcripts. Args: hyps: Hypotheses, represented as string tensors of shape [N]. refs: References, represented as string tensors of shape [N]. Returns: An int64 tensor, word_errs, of size [N, 2] where word_errs[i, 0] corresponds to the number of word errors in hyps[i] relative to refs[i]; word_errs[i, 1] corresponds to the number of words in refs[i]. """ def _NormalizeWhitespace(s): return tf.regex_replace(tf.strings.strip(s), r'\s+', ' ') hyps = _NormalizeWhitespace(hyps) refs = _NormalizeWhitespace(refs) hyps = py_utils.HasRank(hyps, 1) refs = py_utils.HasRank(refs, 1) hyps = py_utils.HasShape(hyps, tf.shape(refs)) word_errors = tf.to_int64( tf.edit_distance(tf.string_split(hyps), tf.string_split(refs), normalize=False)) # Count number of spaces in reference, and increment by 1 to get total number # of words. ref_words = tf.to_int64( tf.strings.length(tf.regex_replace(refs, '[^ ]', '')) + 1) # Set number of words to 0 if the reference was empty. ref_words = tf.where(tf.equal(refs, ''), tf.zeros_like(ref_words, tf.int64), ref_words) return tf.concat( [tf.expand_dims(word_errors, -1), tf.expand_dims(ref_words, -1)], axis=1)
def FPropTower(self, theta, input_batch): p = self.params batch = tf.shape(input_batch.data)[0] height, width, depth = p.input.data_shape act = tf.reshape(input_batch.data, [batch, height, width, depth]) for i in range(len(self.conv)): # Conv, BN (optional) act, _ = self.conv[i].FProp(theta.conv[i], act) # MaxPool act, _ = self.pool[i].FProp(theta.pool[i], act) # Dropout (optional) if p.dropout_prob > 0.0 and not p.is_eval: act = tf.nn.dropout(act, keep_prob=1.0 - p.dropout_prob, seed=p.random_seed) # FC act = self.fc.FProp(theta.fc, tf.reshape(act, [batch, -1])) # Softmax labels = tf.to_int64(input_batch.label) xent = self.softmax.FProp(theta=theta.softmax, inputs=act, class_weights=input_batch.weight, class_ids=labels) self._AddSummary(input_batch, xent.per_example_argmax) rets = { 'loss': (xent.avg_xent, batch), 'log_pplx': (xent.avg_xent, batch), 'num_preds': (batch, 1), } if p.is_eval: acc1 = self._Accuracy(1, xent.logits, labels, input_batch.weight) acc5 = self._Accuracy(5, xent.logits, labels, input_batch.weight) rets.update(accuracy=(acc1, batch), acc5=(acc5, batch)) return rets, {}
def _BuildMetric(self, feed_data, classid): """Construct tensors and the feed_dict for Waymo metric op. Args: feed_data: a NestedMap returned by _GetData(). classid: integer. Returns: A tuple of 3 dicts: - scalar_metrics: a dict mapping all the metric names to fetch tensors. - curves: a dict mapping all the curve names to fetch tensors. - feed_dict: a dict mapping the tensors in feed_tensors to feed values. """ if feed_data is None: dummy_scalar = tf.constant(np.nan) dummy_curve = tf.zeros( [self.metadata.NumberOfPrecisionRecallPoints(), 2], tf.float32) scalar_metrics = { 'ap': dummy_scalar, 'ap_ha_weighted': dummy_scalar } curve_metrics = {'pr': dummy_curve, 'pr_ha_weighted': dummy_curve} return scalar_metrics, curve_metrics, {} feed_dict = {} f_gt_bbox = tf.placeholder(tf.float32) feed_dict[f_gt_bbox] = feed_data.gt.bbox f_gt_imgid = tf.placeholder(tf.int32) feed_dict[f_gt_imgid] = feed_data.gt.imgid f_pd_bbox = tf.placeholder(tf.float32) feed_dict[f_pd_bbox] = feed_data.pd.bbox f_pd_imgid = tf.placeholder(tf.int32) feed_dict[f_pd_imgid] = feed_data.pd.imgid f_pd_score = tf.placeholder(tf.float32) feed_dict[f_pd_score] = feed_data.pd.score num_gt_bboxes = feed_data.gt.imgid.shape[0] num_pd_bboxes = feed_data.pd.imgid.shape[0] gt_class_ids = tf.constant(classid, dtype=tf.uint8, shape=[num_gt_bboxes]) pd_class_ids = tf.constant(classid, dtype=tf.uint8, shape=[num_pd_bboxes]) ap, ap_ha, pr, pr_ha, _ = py_metrics_ops.detection_metrics( prediction_bbox=f_pd_bbox, prediction_type=pd_class_ids, prediction_score=f_pd_score, prediction_frame_id=tf.to_int64(f_pd_imgid), prediction_overlap_nlz=tf.zeros_like(f_pd_imgid, dtype=tf.bool), ground_truth_bbox=f_gt_bbox, ground_truth_type=gt_class_ids, ground_truth_frame_id=tf.to_int64(f_gt_imgid), ground_truth_difficulty=tf.zeros_like(f_gt_imgid, dtype=tf.uint8), config=self._waymo_metric_config) # All tensors returned by Waymo's metric op have a leading dimension # B=number of breakdowns. At this moment we always use B=1 to make # it compatible to the python code. scalar_metrics = {'ap': ap[0], 'ap_ha_weighted': ap_ha[0]} curve_metrics = {'pr': pr[0], 'pr_ha_weighted': pr_ha[0]} return scalar_metrics, curve_metrics, feed_dict