예제 #1
0
def efficientdet(features, model_name=None, config=None, **kwargs):
    """Build EfficientDet model."""
    if not config and not model_name:
        raise ValueError('please specify either model name or config')

    if not config:
        config = hparams_config.get_efficientdet_config(model_name)
    elif isinstance(config, dict):
        config = hparams_config.Config(config)  # wrap dict in Config object

    if kwargs:
        config.override(kwargs)

    logging.info(config)

    # build backbone features.
    features = build_backbone(features, config)
    logging.info('backbone params/flops = {:.6f}M, {:.9f}B'.format(
        *utils.num_params_flops()))

    # build feature network.
    fpn_feats = build_feature_network(features, config)
    logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format(
        *utils.num_params_flops()))

    # build class and box predictions.
    class_outputs, box_outputs = build_class_and_box_outputs(fpn_feats, config)
    logging.info('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format(
        *utils.num_params_flops()))

    return class_outputs, box_outputs
예제 #2
0
def efficientdet(model_name=None, config=None, **kwargs):
    """Build EfficientDet model.

  Args:
    features: input tensor.
    model_name: String of the model (eg. efficientdet-d0)
    config: Dict of parameters for the network
    **kwargs: other parameters.

  Returns:
    A tuple (class_outputs, box_outputs) for predictions.
  """
    if not config and not model_name:
        raise ValueError('please specify either model name or config')

    if not config:
        config = hparams_config.get_efficientdet_config(model_name)
    elif isinstance(config, dict):
        config = hparams_config.Config(config)  # wrap dict in Config object

    if kwargs:
        config.override(kwargs)

    logging.info(config)
    inputs = tf.keras.layers.Input(
        [*utils.parse_image_size(config.image_size), 3])
    # build backbone features.
    features, backbone_outputs = build_backbone(inputs, config)
    logging.info('backbone params/flops = {:.6f}M, {:.9f}B'.format(
        *utils.num_params_flops()))

    # build feature network.
    fpn_feats = build_feature_network(features, config)
    logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format(
        *utils.num_params_flops()))

    # build class and box predictions.
    class_outputs, box_outputs = build_class_and_box_outputs(fpn_feats, config)
    logging.info('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format(
        *utils.num_params_flops()))

    return tf.keras.Model(
        inputs=inputs, outputs=[backbone_outputs, class_outputs, box_outputs])
예제 #3
0
def efficientdet(features, model_name=None, config=None, **kwargs):
  """Build EfficientDet model.

  Args:
    features: input tensor.
    model_name: String of the model (eg. efficientdet-d0)
    config: Dict of parameters for the network
    **kwargs: other parameters.

  Returns:
    A tuple (class_outputs, box_outputs) for predictions.
  """
  if not config and not model_name:
    raise ValueError('please specify either model name or config')

  if not config:
    config = hparams_config.get_efficientdet_config(model_name)
  elif isinstance(config, dict):
    config = hparams_config.Config(config)  # wrap dict in Config object

  if kwargs:
    config.override(kwargs)

  logging.info(config)

  # build backbone features.
  features = legacy_arch.build_backbone(features, config)
  logging.info('backbone params/flops = {:.6f}M, {:.9f}B'.format(
      *utils.num_params_flops()))

  # build feature network.
  fpn_feats = legacy_arch.build_feature_network(features, config)
  logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format(
      *utils.num_params_flops()))

  # build class and box predictions.
  class_box = BuildClassAndBoxOutputs(**config)
  class_outputs, box_outputs = class_box.call(fpn_feats)
  logging.info('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format(
      *utils.num_params_flops()))

  return class_outputs, box_outputs
예제 #4
0
def retinanet(features, model_name='retinanet-50', config=None, **kwargs):
    """RetinaNet classification and regression model."""
    if not config:
        config = hparams_config.get_retinanet_config(model_name)
    config.override(kwargs)

    min_level = config.get('min_level', 3)
    max_level = config.get('max_level', 7)
    num_classes = config.get('num_classes', 90)
    resnet_depth = config.get('resnet_depth', 50)
    use_nearest_upsampling = config.get('resnet_depth', True)
    is_training_bn = config.get('is_training_bn', False)
    num_anchors = len(config.aspect_ratios) * config.num_scales

    # create feature pyramid networks
    feats = resnet_fpn(features, min_level, max_level, resnet_depth,
                       is_training_bn, use_nearest_upsampling)
    logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format(
        *utils.num_params_flops()))
    # add class net and box net in RetinaNet. The class net and the box net are
    # shared among all the levels.
    with tf.variable_scope('retinanet'):
        class_outputs = {}
        box_outputs = {}
        with tf.variable_scope('class_net', reuse=tf.AUTO_REUSE):
            for level in range(min_level, max_level + 1):
                class_outputs[level] = class_net(feats[level], level,
                                                 num_classes, num_anchors,
                                                 is_training_bn)
        with tf.variable_scope('box_net', reuse=tf.AUTO_REUSE):
            for level in range(min_level, max_level + 1):
                box_outputs[level] = box_net(feats[level], level, num_anchors,
                                             is_training_bn)
    logging.info('backbone+fpn params/flops = {:.6f}M, {:.9f}B'.format(
        *utils.num_params_flops()))

    return class_outputs, box_outputs
예제 #5
0
    def build_model(self,
                    inputs: tf.Tensor,
                    is_training: bool = False) -> List[tf.Tensor]:
        """Build model with inputs and labels and print out model stats."""
        tf.logging.info('start building model')
        model_arch = det_model_fn.get_model_arch(self.model_name)
        cls_outputs, box_outputs = model_arch(inputs,
                                              model_name=self.model_name,
                                              is_training_bn=is_training,
                                              use_bfloat16=False,
                                              **self.model_overrides)

        print('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format(
            *utils.num_params_flops()))

        all_outputs = list(cls_outputs.values()) + list(box_outputs.values())
        return all_outputs
예제 #6
0
 def build_model(self,
                 model_name,
                 isize,
                 is_training=False,
                 data_format='channels_last'):
     if isinstance(isize, int):
         isize = (isize, isize)
     if data_format == 'channels_first':
         inputs_shape = [1, 3, isize[0], isize[1]]
     else:
         inputs_shape = [1, isize[0], isize[1], 3]
     inputs = tf.ones(shape=inputs_shape, name='input', dtype=tf.float32)
     efficientdet_arch.efficientdet(inputs,
                                    model_name=model_name,
                                    is_training_bn=is_training,
                                    image_size=isize,
                                    data_format=data_format)
     return utils.num_params_flops(False)
예제 #7
0
 def build_model(self,
                 model_name,
                 isize=None,
                 is_training=False,
                 data_format='channels_last'):
     config = hparams_config.get_efficientdet_config(model_name)
     config.image_size = isize or config.image_size
     isize = utils.parse_image_size(config.image_size)
     if data_format == 'channels_first':
         inputs_shape = [1, 3, isize[0], isize[1]]
     else:
         inputs_shape = [1, isize[0], isize[1], 3]
     inputs = tf.ones(shape=inputs_shape, name='input', dtype=tf.float32)
     efficientdet_arch.efficientdet(inputs,
                                    model_name=model_name,
                                    is_training_bn=is_training,
                                    image_size=isize,
                                    data_format=data_format)
     return utils.num_params_flops(False)
예제 #8
0
  def build_model(self, inputs: tf.Tensor,
                  is_training: bool = False) -> List[tf.Tensor]:
    """Build model with inputs and labels and print out model stats."""
    logging.info('start building model')
    cls_outputs, box_outputs = inference.build_model(
        self.model_name,
        inputs,
        is_training_bn=is_training,
        config=self.model_config)

    print('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format(
        *utils.num_params_flops()))

    # Write to tfevent for tensorboard.
    train_writer = tf.summary.FileWriter(self.logdir)
    train_writer.add_graph(tf.get_default_graph())
    train_writer.flush()

    all_outputs = list(cls_outputs.values()) + list(box_outputs.values())
    return all_outputs
예제 #9
0
    def build_model(self, inputs: tf.Tensor,
                    is_training: bool) -> List[tf.Tensor]:
        """Build model with inputs and labels and print out model stats."""
        tf.logging.info('start building model')
        if self.model_name.startswith('efficientdet'):
            cls_outputs, box_outputs = efficientdet_arch.efficientdet(
                inputs,
                model_name=self.model_name,
                is_training_bn=is_training,
                use_bfloat16=False)
        elif self.model_name.startswith('retinanet'):
            cls_outputs, box_outputs = retinanet_arch.retinanet(
                inputs,
                model_name=self.model_name,
                is_training_bn=is_training,
                use_bfloat16=False)

        print('backbone+fpn+box params/flops = {:.6f}M, {:.9f}B'.format(
            *utils.num_params_flops()))

        all_outputs = list(cls_outputs.values()) + list(box_outputs.values())
        return all_outputs
예제 #10
0
def model_fn(features, labels, mode, params):
    """The model_fn to be used with TPUEstimator.

  Args:
    features: A dict of `Tensor` of batched images and other features.
    labels: a Tensor or a dict of Tensor representing the batched labels.
    mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}`
    params: `dict` of parameters passed to the model from the TPUEstimator,
      `params['batch_size']` is always provided and should be used as the
      effective batch size.

  Returns:
    A `TPUEstimatorSpec` for the model
  """
    logging.info('params=%s', params)
    images = features['image'] if isinstance(features, dict) else features
    labels = labels['label'] if isinstance(labels, dict) else labels
    config = params['config']
    image_size = params['image_size']
    utils.scalar('model/resolution', image_size)

    if config.model.data_format == 'channels_first':
        images = tf.transpose(images, [0, 3, 1, 2])

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    has_moving_average_decay = (config.train.ema_decay > 0)
    if FLAGS.use_tpu and not config.model.bn_type:
        config.model.bn_type = 'tpu_bn'
    # This is essential, if using a keras-derived model.
    tf.keras.backend.set_learning_phase(is_training)

    def build_model(in_images):
        """Build model using the model_name given through the command line."""
        config.model.num_classes = config.data.num_classes
        model = effnetv2_model.EffNetV2Model(config.model.model_name,
                                             config.model)
        logits = model(in_images, training=is_training)[0]
        return logits

    pre_num_params, pre_num_flops = utils.num_params_flops(
        readable_format=True)

    if config.runtime.mixed_precision:
        precision = 'mixed_bfloat16' if FLAGS.use_tpu else 'mixed_float16'
        logits = utils.build_model_with_precision(precision, build_model,
                                                  images, is_training)
        logits = tf.cast(logits, tf.float32)
    else:
        logits = build_model(images)

    num_params, num_flops = utils.num_params_flops(readable_format=True)
    num_params = num_params - pre_num_params
    num_flops = (num_flops - pre_num_flops) / params['batch_size']
    logging.info('backbone params/flops = %.4f M / %.4f B', num_params,
                 num_flops)
    utils.scalar('model/params', num_params)
    utils.scalar('model/flops', num_flops)

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    if config.train.loss_type == 'sigmoid':
        cross_entropy = tf.losses.sigmoid_cross_entropy(
            multi_class_labels=tf.cast(labels, dtype=logits.dtype),
            logits=logits,
            label_smoothing=config.train.label_smoothing)
    elif config.train.loss_type == 'custom':
        xent = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast(
            labels, dtype=logits.dtype),
                                                       logits=logits)
        cross_entropy = tf.reduce_mean(tf.reduce_sum(xent, axis=-1))
    else:
        if config.data.multiclass:
            logging.info('use multi-class loss: %s', config.data.multiclass)
            labels /= tf.reshape(tf.reduce_sum(labels, axis=1), (-1, 1))
        cross_entropy = tf.losses.softmax_cross_entropy(
            onehot_labels=labels,
            logits=logits,
            label_smoothing=config.train.label_smoothing)

    train_steps = max(config.train.min_steps,
                      config.train.epochs * params['steps_per_epoch'])
    global_step = tf.train.get_global_step()
    weight_decay_inc = config.train.weight_decay_inc * (
        tf.cast(global_step, tf.float32) / tf.cast(train_steps, tf.float32))
    weight_decay = (1 + weight_decay_inc) * config.train.weight_decay
    utils.scalar('train/weight_decay', weight_decay)
    # Add weight decay to the loss for non-batch-normalization variables.
    matcher = re.compile(config.train.weight_decay_exclude)
    l2loss = weight_decay * tf.add_n([
        tf.nn.l2_loss(v)
        for v in tf.trainable_variables() if not matcher.match(v.name)
    ])
    loss = cross_entropy + l2loss
    utils.scalar('loss/l2reg', l2loss)
    utils.scalar('loss/xent', cross_entropy)

    if has_moving_average_decay:
        ema = tf.train.ExponentialMovingAverage(decay=config.train.ema_decay,
                                                num_updates=global_step)
        ema_vars = utils.get_ema_vars()

    host_call = None
    restore_vars_dict = None
    if is_training:
        # Compute the current epoch and associated learning rate from global_step.
        current_epoch = (tf.cast(global_step, tf.float32) /
                         params['steps_per_epoch'])
        utils.scalar('train/epoch', current_epoch)

        scaled_lr = config.train.lr_base * (config.train.batch_size / 256.0)
        scaled_lr_min = config.train.lr_min * (config.train.batch_size / 256.0)
        learning_rate = utils.WarmupLearningRateSchedule(
            scaled_lr,
            steps_per_epoch=params['steps_per_epoch'],
            decay_epochs=config.train.lr_decay_epoch,
            warmup_epochs=config.train.lr_warmup_epoch,
            decay_factor=config.train.lr_decay_factor,
            lr_decay_type=config.train.lr_sched,
            total_steps=train_steps,
            minimal_lr=scaled_lr_min)(global_step)
        utils.scalar('train/lr', learning_rate)
        optimizer = utils.build_optimizer(
            learning_rate, optimizer_name=config.train.optimizer)
        if FLAGS.use_tpu:
            # When using TPU, wrap the optimizer with CrossShardOptimizer which
            # handles synchronization details between different TPU cores. To the
            # user, this should look like regular synchronous training.
            optimizer = tf.tpu.CrossShardOptimizer(optimizer)

        # filter trainable variables if needed.
        var_list = tf.trainable_variables()
        if config.train.varsexp:
            vars2 = [
                v for v in var_list if re.match(config.train.varsexp, v.name)
            ]
            if len(vars2) == len(var_list):
                logging.warning('%s has no match.', config.train.freeze)
            logging.info('Filter variables: orig=%d, final=%d, delta=%d',
                         len(var_list), len(vars2),
                         len(var_list) - len(vars2))
            var_list = vars2

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if config.train.gclip and is_training:
            logging.info('clip gradients norm by %f', config.train.gclip)
            grads_and_vars = optimizer.compute_gradients(loss, var_list)
            with tf.name_scope('gclip'):
                grads = [gv[0] for gv in grads_and_vars]
                tvars = [gv[1] for gv in grads_and_vars]
                utils.scalar('train/gnorm', tf.linalg.global_norm(grads))
                utils.scalar('train/gnormmax',
                             tf.math.reduce_max([tf.norm(g) for g in grads]))
                # First clip each variable's norm, then clip global norm.
                clip_norm = abs(config.train.gclip)
                clipped_grads = [
                    tf.clip_by_norm(g, clip_norm) if g is not None else None
                    for g in grads
                ]
                clipped_grads, _ = tf.clip_by_global_norm(
                    clipped_grads, clip_norm)
                grads_and_vars = list(zip(clipped_grads, tvars))

            with tf.control_dependencies(update_ops):
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step)
        else:
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(loss,
                                              global_step,
                                              var_list=var_list)

        if has_moving_average_decay:
            with tf.control_dependencies([train_op]):
                train_op = ema.apply(ema_vars)

        if not config.runtime.skip_host_call:
            host_call = utils.get_tpu_host_call(
                global_step, FLAGS.model_dir,
                config.runtime.iterations_per_loop)
    else:
        train_op = None
        if has_moving_average_decay:
            # Load moving average variables for eval.
            restore_vars_dict = ema.variables_to_restore(ema_vars)

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(labels, logits):
            """Evaluation metric function.

      Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, num_classes]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            metrics = {}
            if config.data.multiclass:
                metrics['eval/global_ap'] = tf.metrics.auc(
                    labels,
                    tf.nn.sigmoid(logits),
                    curve='PR',
                    num_thresholds=200,
                    summation_method='careful_interpolation',
                    name='global_ap')

                # Convert labels to set: be careful, tf.metrics.xx_at_k are horrible.
                labels = tf.cast(labels, dtype=tf.int64)
                label_to_repeat = tf.expand_dims(tf.argmax(labels, axis=-1),
                                                 axis=-1)
                all_labels_set = tf.range(0, labels.shape[-1], dtype=tf.int64)
                all_labels_set = tf.expand_dims(all_labels_set, axis=0)
                labels_set = labels * all_labels_set + (
                    1 - labels) * label_to_repeat

                metrics['eval/precision@1'] = tf.metrics.precision_at_k(
                    labels_set, logits, k=1)
                metrics['eval/recall@1'] = tf.metrics.recall_at_k(labels_set,
                                                                  logits,
                                                                  k=1)
                metrics['eval/precision@5'] = tf.metrics.precision_at_k(
                    labels_set, logits, k=5)
                metrics['eval/recall@5'] = tf.metrics.recall_at_k(labels_set,
                                                                  logits,
                                                                  k=5)

            # always add accuracy.
            labels = tf.argmax(labels, axis=1)
            predictions = tf.argmax(logits, axis=1)
            metrics['eval/acc_top1'] = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            metrics['eval/acc_top5'] = tf.metrics.mean(in_top_5)
            metrics['model/resolution'] = tf.metrics.mean(image_size)
            metrics['model/flops'] = tf.metrics.mean(num_flops)
            metrics['model/params'] = tf.metrics.mean(num_params)
            return metrics

        eval_metrics = (metric_fn, [labels, logits])

    if has_moving_average_decay and not is_training:

        def scaffold_fn():  # read ema for eval jobs.
            saver = tf.train.Saver(restore_vars_dict)
            return tf.train.Scaffold(saver=saver)
    elif config.train.ft_init_ckpt and is_training:

        def scaffold_fn():
            logging.info('restore variables from %s',
                         config.train.ft_init_ckpt)
            var_map = utils.get_ckpt_var_map(
                ckpt_path=config.train.ft_init_ckpt,
                skip_mismatch=True,
                init_ema=config.train.ft_init_ema)
            tf.train.init_from_checkpoint(config.train.ft_init_ckpt, var_map)
            return tf.train.Scaffold()
    else:
        scaffold_fn = None

    return tf.estimator.tpu.TPUEstimatorSpec(mode=mode,
                                             loss=loss,
                                             train_op=train_op,
                                             host_call=host_call,
                                             eval_metrics=eval_metrics,
                                             scaffold_fn=scaffold_fn)