def build_outputs(self, features, labels, mode): backbone_features = self._backbone_fn( features, is_training=(mode == mode_keys.TRAIN)) fpn_features = self._fpn_fn(backbone_features, is_training=(mode == mode_keys.TRAIN)) cls_outputs, box_outputs = self._head_fn( fpn_features, is_training=(mode == mode_keys.TRAIN)) model_outputs = { 'cls_outputs': cls_outputs, 'box_outputs': box_outputs, } # Print number of parameters and FLOPS in model. batch_size, _, _, _ = backbone_features.values()[0].get_shape( ).as_list() benchmark_utils.compute_model_statistics( batch_size, is_training=(mode == mode_keys.TRAIN)) if mode != mode_keys.TRAIN: boxes, scores, classes, valid_detections = self._generate_detections_fn( box_outputs, cls_outputs, labels['anchor_boxes'], labels['image_info'][:, 1:2, :]) model_outputs.update({ 'num_detections': valid_detections, 'detection_boxes': boxes, 'detection_classes': classes, 'detection_scores': scores, }) return model_outputs
def predict(self, features): """Returns a TPUEstimatorSpec for prediction. Args: features: a dict of Tensors including the input images and other label tensors used for prediction. Returns: a TPUEstimatorSpec object used for prediction. """ images = features['images'] labels = features['labels'] outputs = self.build_outputs(images, labels, mode=mode_keys.PREDICT) # Log model statistics. batch_size = images.get_shape().as_list()[0] _, _ = benchmark_utils.compute_model_statistics( batch_size=batch_size, json_file_path=os.path.join(self._model_dir, 'predict_model_stats.json')) predictions = self.build_predictions(outputs, labels) tpu_estimator_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=tf.estimator.ModeKeys.PREDICT, predictions=predictions) if self._use_tpu: return tpu_estimator_spec else: return tpu_estimator_spec.as_estimator_spec()
def evaluate(self, images, labels): """Returns a TPUEstimatorSpec for evaluation. Args: images: a Tensor of shape [batch_size, height, width, channel] representing the input image tensor. labels: a dict of label tensors. Returns: a TPUEstimatorSpec object used for evaluation. """ outputs = self.build_outputs(images, labels, mode=mode_keys.EVAL) # Log model statistics. batch_size = images.get_shape().as_list()[0] _, _ = benchmark_utils.compute_model_statistics( batch_size=batch_size, json_file_path=os.path.join(self._model_dir, 'eval_model_stats.json')) model_loss = self.build_losses(outputs, labels) eval_metrics = self.build_metrics(outputs, labels) tpu_estimator_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=tf.estimator.ModeKeys.EVAL, loss=model_loss, eval_metrics=eval_metrics) if self._use_tpu: return tpu_estimator_spec else: return tpu_estimator_spec.as_estimator_spec()
def build_outputs(self, images, labels, mode): """Builds the model forward pass and generates outputs. It wraps the implementation in `_build_outputs` with some code to handle bfloat16 scope. Args: images: a Tensor of shape [batch_size, height, width, channel], representing the input image. labels: a dict of Tensors that includes labels used for training/eval. mode: one of mode_keys.TRAIN, mode_keys.EVAL, mode_keys.PREDICT. Returns: a dict of output tensors. """ if self._use_bfloat16: with tf.tpu.bfloat16_scope(): def cast_outputs_to_float(d): for k, v in sorted(six.iteritems(d)): if isinstance(v, dict): cast_outputs_to_float(v) else: d[k] = tf.cast(v, tf.float32) # Casts class and box outputs to tf.float32. outputs = self._build_outputs(images, labels, mode) cast_outputs_to_float(outputs) else: outputs = self._build_outputs(images, labels, mode) # Log model statistics. batch_size = images.get_shape().as_list()[0] _, _ = benchmark_utils.compute_model_statistics(batch_size) return outputs
def build_outputs(self, features, labels, mode): backbone_features = self._backbone_fn( features, is_training=(mode == mode_keys.TRAIN)) fpn_features = self._fpn_fn(backbone_features, is_training=(mode == mode_keys.TRAIN)) logits = self._head_fn(fpn_features, is_training=(mode == mode_keys.TRAIN)) outputs = { 'logits': logits, } # Print number of parameters and FLOPS in model. batch_size, _, _, _ = backbone_features.values()[0].get_shape( ).as_list() benchmark_utils.compute_model_statistics( batch_size, is_training=(mode == mode_keys.TRAIN)) return outputs
def _build_outputs(self, images, labels, mode): batch_size = tf.shape(images)[0] if 'anchor_boxes' in labels: anchor_boxes = labels['anchor_boxes'] else: anchor_boxes = anchor.Anchor( self._params.architecture.min_level, self._params.architecture.max_level, self._params.anchor.num_scales, self._params.anchor.aspect_ratios, self._params.anchor.anchor_size, images.get_shape().as_list()[1:3]).multilevel_boxes for level in anchor_boxes: anchor_boxes[level] = tf.tile( tf.expand_dims(anchor_boxes[level], 0), [batch_size, 1, 1]) backbone_features = self._backbone_fn( images, is_training=(mode == mode_keys.TRAIN)) fpn_features = self._fpn_fn(backbone_features, is_training=(mode == mode_keys.TRAIN)) if self._params.architecture.output_flat_fpn_features: flat_fpn_features_list = [] for level in range(self._params.architecture.min_level, self._params.architecture.max_level + 1): flat_fpn_features_list.append( tf.reshape(fpn_features[level], [batch_size, -1])) flat_fpn_features = tf.concat(flat_fpn_features_list, axis=1) flat_fpn_features = tf.identity(flat_fpn_features, 'RawFpnFeatures') cls_outputs, box_outputs = self._head_fn( fpn_features, is_training=(mode == mode_keys.TRAIN)) model_outputs = { 'cls_outputs': cls_outputs, 'box_outputs': box_outputs, } tf.logging.info('Computing number of FLOPs before NMS...') static_batch_size = images.get_shape().as_list()[0] if static_batch_size: _, _ = benchmark_utils.compute_model_statistics(static_batch_size) if mode != mode_keys.TRAIN: detection_results = self._generate_detections_fn( box_outputs, cls_outputs, anchor_boxes, labels['image_info'][:, 1:2, :]) model_outputs.update(detection_results) return model_outputs
def _build_outputs(self, images, labels, mode): if 'anchor_boxes' in labels: anchor_boxes = labels['anchor_boxes'] else: anchor_boxes = anchor.Anchor( self._anchor_params.min_level, self._anchor_params.max_level, self._anchor_params.num_scales, self._anchor_params.aspect_ratios, self._anchor_params.anchor_size, images.get_shape().as_list()[1:3]).multilevel_boxes batch_size = tf.shape(images)[0] for level in anchor_boxes: anchor_boxes[level] = tf.tile( tf.expand_dims(anchor_boxes[level], 0), [batch_size, 1, 1]) backbone_features = self._backbone_fn( images, is_training=(mode == mode_keys.TRAIN)) fpn_features = self._fpn_fn(backbone_features, is_training=(mode == mode_keys.TRAIN)) cls_outputs, box_outputs = self._head_fn( fpn_features, is_training=(mode == mode_keys.TRAIN)) model_outputs = { 'cls_outputs': cls_outputs, 'box_outputs': box_outputs, } tf.logging.info('Computing number of FLOPs before NMS...') _, _ = benchmark_utils.compute_model_statistics( images.get_shape().as_list()[0]) if mode != mode_keys.TRAIN: detection_results = self._generate_detections_fn( box_outputs, cls_outputs, anchor_boxes, labels['image_info'][:, 1:2, :]) model_outputs.update(detection_results) return model_outputs
def train(self, images, labels): """Returns a TPUEstimatorSpec for training. Args: images: a Tensor of shape [batch_size, height, width, channel] representing the input image tensor. labels: a dict of label tensors. Returns: a TPUEstimatorSpec object used for training. """ # If the input image is transposed, we need to revert it back to the # original shape before it's used in the computation. if self._transpose_input: if self._space_to_depth_block_size > 1: # HWNC -> NHWC images = tf.transpose(images, [2, 0, 1, 3]) else: # HWCN -> NHWC images = tf.transpose(images, [3, 0, 1, 2]) outputs = self.build_outputs(images, labels, mode=mode_keys.TRAIN) # Log model statistics. batch_size = images.get_shape().as_list()[0] _, _ = benchmark_utils.compute_model_statistics( batch_size=batch_size, json_file_path=os.path.join(self._model_dir, 'train_model_stats.json')) model_loss = self.build_losses(outputs, labels) global_step = tf.train.get_global_step() learning_rate = self._learning_rate_fn(global_step) self.add_scalar_summary('learning_rate', learning_rate) # Sets up the optimizer. optimizer = self._optimizer_fn(learning_rate) if self._use_tpu: optimizer = tf.tpu.CrossShardOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Gets all trainable variables and apply the variable filter. train_var_list = filter_trainable_variables(tf.trainable_variables(), self._frozen_var_prefix) # Gets the regularization variables and apply the regularization loss. regularization_var_list = filter_regularization_variables( train_var_list, self._regularization_var_regex) l2_regularization_loss = self._l2_weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in regularization_var_list]) self.add_scalar_summary('l2_regularization_loss', l2_regularization_loss) total_loss = model_loss + l2_regularization_loss grads_and_vars = optimizer.compute_gradients(total_loss, train_var_list) if self._gradient_clip_norm > 0.0: grads = [gv[0] for gv in grads_and_vars] tvars = [gv[1] for gv in grads_and_vars] clipped_grads, _ = tf.clip_by_global_norm(grads, self._gradient_clip_norm) grads_and_vars = list(zip(clipped_grads, tvars)) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_vars, global_step) scaffold_fn = self.restore_from_checkpoint() if self._enable_summary: host_call_fn = self.summarize() else: host_call_fn = None tpu_estimator_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=tf.estimator.ModeKeys.TRAIN, loss=total_loss, train_op=train_op, host_call=host_call_fn, scaffold_fn=scaffold_fn) if self._use_tpu: return tpu_estimator_spec else: return tpu_estimator_spec.as_estimator_spec()
def _log_model_statistics(self, batched_input): batch_size, _, _, _ = batched_input.get_shape().as_list() _, _ = benchmark_utils.compute_model_statistics( batch_size)