Exemplo n.º 1
0
 def testBatchNormScopeDoesHasIsTrainingWhenItsNotNone(self):
   sc = mobilenet_v1.mobilenet_v1_arg_scope(is_training=True)
   self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
   sc = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False)
   self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
   sc = mobilenet_v1.mobilenet_v1_arg_scope()
   self.assertIn('is_training', sc[slim.arg_scope_func_key(slim.batch_norm)])
Exemplo n.º 2
0
    def build(self, cost, model, train):
        if model == "MobilenetV1":
            with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
                self.logits, self.end_points = mobilenet_v1.mobilenet_v1(
                    self.input,
                    num_classes=self.numb_logits,
                    dropout_keep_prob=self.kp,
                    is_training=train)
        elif model == "vgg_16":
            with slim.arg_scope(vgg.vgg_arg_scope()):
                self.logits, self.end_points = vgg.vgg_16(
                    self.input,
                    num_classes=self.numb_logits,
                    dropout_keep_prob=self.kp,
                    is_training=True)

        self.prob = tf.nn.softmax(self.logits, name="prob")
        self.loss = tf.reduce_mean(
            tf.reduce_sum(tf.pow(self.prob - self.target, 2), axis=1))
        tf.summary.scalar('loss', self.loss)
        if cost == "mse":
            self.cost = self.loss
        else:
            self.xtarget = self.target * (1 - 1e-11) + 1e-12
            assert self.xtarget.get_shape().as_list()[1] == self.numb_logits
            self.xprob = self.prob * (1 - 1e-11) + 1e-12
            assert self.xprob.get_shape().as_list()[1] == self.numb_logits
            self.cost = tf.reduce_mean(
                tf.reduce_sum(self.xtarget * tf.log(self.xtarget / self.prob),
                              axis=1))
            tf.summary.scalar('cost_kl', self.cost)
Exemplo n.º 3
0
def main(_):
    if not FLAGS.output_file:
        raise ValueError(
            'You must supply the path to save to with --output_file')
    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default() as graph:
        image_size = FLAGS.image_size
        input_shape = [FLAGS.batch_size, image_size, image_size, 3]
        placeholder = tf.placeholder(name='input',
                                     dtype=tf.float32,
                                     shape=input_shape)
        scope = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False,
                                                    weight_decay=0.0)
        with slim.arg_scope(scope):
            mobilenet_v1_l2norm.mobilenet_v1_l2norm(
                placeholder, is_training=False, num_classes=FLAGS.num_classes)

        if FLAGS.quantize:
            tf.contrib.quantize.create_eval_graph()

        graph_def = graph.as_graph_def()
        if FLAGS.write_text_graphdef:
            tf.io.write_graph(graph_def,
                              os.path.dirname(FLAGS.output_file),
                              os.path.basename(FLAGS.output_file),
                              as_text=True)
        else:
            with tf.gfile.GFile(FLAGS.output_file, 'wb') as f:
                f.write(graph_def.SerializeToString())
Exemplo n.º 4
0
    def head_net(self, blocks, is_training, trainable=True):

        normal_initializer = tf.truncated_normal_initializer(0, 0.01)
        msra_initializer = tf.contrib.layers.variance_scaling_initializer()
        xavier_initializer = tf.contrib.layers.xavier_initializer()

        with slim.arg_scope(mobilenet_v1_arg_scope(is_training=is_training)):
            out = slim.conv2d_transpose(blocks, 256, [4, 4], stride=2,
                                        trainable=trainable, weights_initializer=normal_initializer,
                                        padding='SAME', activation_fn=tf.nn.relu,
                                        scope='up1')
            out = slim.conv2d_transpose(out, 256, [4, 4], stride=2,
                                        trainable=trainable, weights_initializer=normal_initializer,
                                        padding='SAME', activation_fn=tf.nn.relu,
                                        scope='up2')
            out = slim.conv2d_transpose(out, 256, [4, 4], stride=2,
                                        trainable=trainable, weights_initializer=normal_initializer,
                                        padding='SAME', activation_fn=tf.nn.relu,
                                        scope='up3')

            out = slim.conv2d(out, cfg.nr_skeleton, [1, 1],
                              trainable=trainable, weights_initializer=msra_initializer,
                              padding='SAME', normalizer_fn=None, activation_fn=None,
                              scope='out')

        return out
    def _extract_box_classifier_features(self, proposal_feature_maps, scope):
        """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
        net = proposal_feature_maps

        depth = lambda d: max(int(d * 1.0), 16)
        with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights):
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=self._train_batch_norm,
                        weight_decay=self._weight_decay)):
                with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                                    padding='SAME'):
                    net = slim.separable_conv2d(net,
                                                depth(1024), [3, 3],
                                                depth_multiplier=1,
                                                stride=2,
                                                scope='Conv2d_12_pointwise')
                    return slim.separable_conv2d(net,
                                                 depth(1024), [3, 3],
                                                 depth_multiplier=1,
                                                 stride=1,
                                                 scope='Conv2d_13_pointwise')
Exemplo n.º 6
0
def generator(images, n_filter=4,train=True, reuse=False):
    """define generator model
    Args:
        images: input images for generator
        n_filter: number of filter to learn from each image
        train: boolean value to specify if the network is in training mode or inference mode
        reuse: whether to reuse network variables or not    return: 
        output_2: enhanced version of input value 
    """
    with tf.variable_scope("generator",reuse=reuse):
        # first generator network
        with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
            logits, end_points = mobilenet_v1.mobilenet_v1(images, activation_fn=leakyRely, dropout_keep_prob=1, is_training=train)
        net = end_points['Conv2d_13_pointwise']

        filters_1 = slim.conv2d(net,256, [3, 3],stride=2, activation_fn=tf.nn.relu, padding='VALID',
                             normalizer_fn=None, scope='filters_1')
        filters_2_1 = slim.conv2d(filters_1,n_filter, [1, 1],stride=1, activation_fn=None, padding='SAME',
                             normalizer_fn=None, scope='filters_2_1')

        filters_2_2 = tf.expand_dims(filters_2_1,axis=4, name='filters_2_2')

        output_1 = adapt_filter(images[:,:,:,1:2], filters_2_2[:,:,:,0:4,:], name="sat_adapt", train=train)
        print("output_1", output_1.get_shape().as_list())
        output_2 = tf.concat([images[:,:,:,0:1], output_1,images[:,:,:,2:3] ], axis=3)
        print("output_2", output_2.get_shape().as_list())
    return output_2
Exemplo n.º 7
0
 def create(self, images, num_classes, is_training):
     """See baseclass."""
     with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
         _, endpoints = mobilenet_v1.mobilenet_v1(inputs=images,
                                                  num_classes=num_classes,
                                                  is_training=is_training)
         return endpoints
Exemplo n.º 8
0
def build_model():
  """Build the mobilenet_v1 model for evaluation.

  Returns:
    g: graph with rewrites after insertion of quantization ops and batch norm
    folding.
    eval_ops: eval ops for inference.
    variables_to_restore: List of variables to restore from checkpoint.
  """
  g = tf.Graph()
  with g.as_default():
    inputs, labels = imagenet_input(is_training=False)

    scope = mobilenet_v1.mobilenet_v1_arg_scope(
        is_training=False, weight_decay=0.0)
    with slim.arg_scope(scope):
      logits, _ = mobilenet_v1.mobilenet_v1(
          inputs,
          is_training=False,
          depth_multiplier=FLAGS.depth_multiplier,
          num_classes=FLAGS.num_classes)

    if FLAGS.quantize:
      tf.contrib.quantize.create_eval_graph()

    eval_ops = metrics(logits, labels)

  return g, eval_ops
Exemplo n.º 9
0
def build_model():
    """Build the mobilenet_v1 model for evaluation.

  Returns:
    g: graph with rewrites after insertion of quantization ops and batch norm
    folding.
    eval_ops: eval ops for inference.
    variables_to_restore: List of variables to restore from checkpoint.
  """
    g = tf.Graph()
    with g.as_default():
        inputs, labels = imagenet_input(is_training=False)

        scope = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False,
                                                    weight_decay=0.0)
        with slim.arg_scope(scope):
            logits, _ = mobilenet_v1.mobilenet_v1(
                inputs,
                is_training=False,
                depth_multiplier=FLAGS.depth_multiplier,
                num_classes=FLAGS.num_classes,
                final_endpoint=FLAGS.final_endpoint)

        if FLAGS.quantize:
            contrib_quantize.create_eval_graph()

        eval_ops = metrics(logits, labels)

    return g, eval_ops
Exemplo n.º 10
0
 def embedding_fn(images, reuse=False):
     with tf.variable_scope('MobilenetV1', reuse=reuse) as scope:
         with slim.arg_scope(
                 mobilenet_v1.mobilenet_v1_arg_scope(is_training=True)):
             return mobilenet_v1.mobilenet_v1_base(
                 images,
                 final_endpoint=mobilenent_config['final_endpoint'],
                 conv_defs=mobilenet.CONV_DEFS,
                 depth_multiplier=mobilenent_config['depth_multiplier'],
                 scope=scope)
Exemplo n.º 11
0
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Extracts features using the first half of the Inception Resnet v2 network.
    We construct the network in `align_feature_maps=True` mode, which means
    that all VALID paddings in the network are changed to SAME padding so that
    the feature maps are aligned.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])
        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
        }
        depth_multiplier = 1.0
        min_depth = 16
        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=self._is_training,
                        weight_decay=self._weight_decay)):
                # Forces is_training to False to disable batch norm update.
                with slim.arg_scope([slim.batch_norm], is_training=False):
                    with tf.variable_scope('MobileNetV1',
                                           reuse=self._reuse_weights) as scope:
                        _, image_features = mobilenet_v1.mobilenet_v1_base(
                            preprocessed_inputs, scope=scope)
                        #feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        #  feature_map_layout=feature_map_layout,
                        #  depth_multiplier=depth_multiplier,
                        #  min_depth=min_depth,
                        #  insert_1x1_conv=True,
                        #  image_features=image_features)
                        #_, end_points = vgg.vgg_16(preprocessed_inputs, scope=scope,
                        #    is_training=self._is_training)
                        rpn_feature_map = image_features['Conv2d_13_pointwise']
        return rpn_feature_map
Exemplo n.º 12
0
def inference(hypes, images, train=True, reuse=None,
              num_classes=1000,
              num_blocks=[3, 4, 6, 3],  # defaults to 50-layer network
              preprocess=True,
              bottleneck=True):
    # if preprocess is True, input should be RGB [0,1], otherwise BGR with mean
    # subtracted

    if preprocess:
        x = _inception_preprocess(images)

    is_train = tf.convert_to_tensor(train,
                                       dtype='bool',
                                       name='is_training')

    if reuse is None:
        with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_train)):
            net, end_points = mobilenet_v1.mobilenet_v1_base(x)
    		
    else:
        with tf.variable_scope("MobilenetV1", [x], reuse = reuse) as scope:
            with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_train)):
                net, end_points = mobilenet_v1.mobilenet_v1_base(x, scope = scope)


    logits = {}

    logits['images'] = images
    logits['fcn_in'] = end_points['Conv2d_13_pointwise']
    logits['feed2'] = end_points['Conv2d_11_pointwise']
    logits['feed4'] = end_points['Conv2d_5_pointwise']

    logits['early_feat'] = logits['feed2']
    logits['deep_feat'] = logits['fcn_in']

    if train:
        restore = tf.global_variables()
        hypes['init_function'] = _initalize_variables
        hypes['restore'] = restore

    return logits
Exemplo n.º 13
0
def build_model():
  """Builds graph for model to train with rewrites for quantization.

  Returns:
    g: Graph with fake quantization ops and batch norm folding suitable for
    training quantized weights.
    train_tensor: Train op for execution during training.
  """
  g = tf.Graph()
  with g.as_default(), tf.device(
      tf.train.replica_device_setter(FLAGS.ps_tasks)):

    #Reads in data and/or performs pre-processing on the images.
    inputs, labels, _ = tf_input.input(is_random=True, is_training=True)
    labels = labels - 1
    labels = slim.one_hot_encoding(labels, FLAGS.num_classes)

    with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_training=True)):
      logits, _ = mobilenet_v1.mobilenet_v1(
          inputs,
          is_training=True,
          depth_multiplier=FLAGS.depth_multiplier,
          num_classes=FLAGS.num_classes)

    tf.losses.softmax_cross_entropy(labels, logits)

    # Call rewriter to produce graph with fake quant ops and folded batch norms
    # quant_delay delays start of quantization till quant_delay steps, allowing
    # for better model accuracy.
    if FLAGS.quantize:
      tf.contrib.quantize.create_training_graph(quant_delay=get_quant_delay())

    total_loss = tf.losses.get_total_loss(name='total_loss')
    # Configure the learning rate using an exponential decay.
    num_epochs_per_decay = 2.5
    data_size = tf_input.TRAINING_SET_SIZE
    decay_steps = int(data_size / FLAGS.batch_size * num_epochs_per_decay)

    learning_rate = tf.train.exponential_decay(
        get_learning_rate(),
        tf.train.get_or_create_global_step(),
        decay_steps,
        _LEARNING_RATE_DECAY_FACTOR,
        staircase=True)
    opt = tf.train.GradientDescentOptimizer(learning_rate)

    train_tensor = slim.learning.create_train_op(
        total_loss,
        optimizer=opt)

  slim.summaries.add_scalar_summary(total_loss, 'total_loss', 'losses')
  slim.summaries.add_scalar_summary(learning_rate, 'learning_rate', 'training')
  return g, train_tensor
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    # TODO(skligys): Enable fused batch norm once quantization supports it.
                    with slim.arg_scope([slim.batch_norm], fused=False):
                        _, image_features = mobilenet_v1.mobilenet_v1_base(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            use_explicit_padding=self._use_explicit_padding,
                            scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                # TODO(skligys): Enable fused batch norm once quantization supports it.
                with slim.arg_scope([slim.batch_norm], fused=False):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
Exemplo n.º 15
0
def fcn_mobv1(images, num_classes, is_training=True):

    with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
        _, end_points = mobilenet_v1.mobilenet_v1(images,
                                                  num_classes,
                                                  is_training=False,
                                                  spatial_squeeze=False)

        #        for v,k in end_points.items():
        #                print('{v}:{k}'.format(v = v, k = k))

        #        pool4=end_points['resnet_v1_101/pool4']
        #
        #        dconv1_out=pool4.get_shape().as_list()
        #
        #
        #        deconv1=slim.conv2d_transpose(net,dconv1_out[3],[4,4], stride=2,scope='deconv1')
        #
        #        fu1=tf.add(deconv1,pool4)
        #
        #
        #        pool3=end_points['resnet_v1_101/pool3']
        #        dconv2_out=pool3.get_shape().as_list()
        #        deconv2=slim.conv2d_transpose(fu1,dconv2_out[3],[4,4], stride=2,scope='deconv2')
        #
        #        fu2=tf.add(deconv2,pool3)
        net_7 = end_points['Conv2d_13_pointwise']
        net_14 = end_points['Conv2d_11_pointwise']
        net_28 = end_points['Conv2d_5_pointwise']

        up1 = slim.conv2d_transpose(net_7,
                                    512, [4, 4],
                                    stride=2,
                                    scope='deconv32')
        fu1 = tf.add(up1, net_14, name='fu1')

        up2 = slim.conv2d_transpose(fu1,
                                    256, [4, 4],
                                    stride=2,
                                    scope='deconv16')
        fu2 = tf.add(up2, net_28, name='fu2')

        logit = slim.conv2d_transpose(fu2,
                                      num_classes, [16, 16],
                                      stride=8,
                                      scope='deconv8')

        prediction = tf.argmax(logit, dimension=3)  #, name="prediction")

        print('logit', logit)

        return logit, tf.expand_dims(prediction, axis=3)
Exemplo n.º 16
0
def MOBILENET(image_batch_tensor, is_training):
    '''
    Returns the MobileNet model definition for use within the MobileSeg model.

    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, channels] Tensor
        Tensor containing a batch of input images.

    is_training : bool
        True if network is being trained, False otherwise. This controls whether
        dropout layers should be enabled, and the behaviour of the batchnorm
        layers.

    Returns
    -------


    conv13_features:
        Features with a stride length of 32. The layer is referred to as
        'MobilenetV1/Conv2d_13_pointwise/Conv2D' in the MobileNet Tensorflow
        implementation. These features feed into the average pooling layer in
        the original network; however the pooling layer and subsequent fc and
        softmax layers have been removed in this implementation.

    conv11_features:
        Features with a stride length of 16. (Output of the
        'MobilenetV1/Conv2d_11_pointwise/Conv2D' layer.)

    conv5_features:
        Features with a stride length of 8. (Output of the
        'MobilenetV1/Conv2d_5_pointwise/Conv2D' layer.)
    '''
    # Convert image to float32 before subtracting the mean pixel values
    image_batch_float = tf.to_float(image_batch_tensor)

    # Subtract the mean pixel value from each pixel
    mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

    with slim.arg_scope(
            mobilenet.mobilenet_v1_arg_scope(is_training=is_training)):
        conv13_features, end_points = mobilenet.mobilenet_v1_base(
            image_batch_tensor,
            final_endpoint='Conv2d_13_pointwise',
            min_depth=8,
            depth_multiplier=1.0,
            conv_defs=None,
            output_stride=None,
            scope=None)

    return conv13_features, end_points['Conv2d_11_pointwise'], end_points[
        'Conv2d_5_pointwise']
Exemplo n.º 17
0
def _build_mobilenet_model(is_training, images, params):
    with slim.arg_scope(
            mobilenet_v1.mobilenet_v1_arg_scope(is_training=is_training)):
        out, _ = mobilenet_v1.mobilenet_v1(
            images,
            is_training=is_training,
            depth_multiplier=params.depth_multiplier,
            num_classes=None)
        tf.logging.info("mobilenet preembedding shape{}".format(
            out.get_shape().as_list()))
        out = tf.reshape(out, [-1, 256])
        out = tf.layers.dense(out, params.embedding_size, name="embeddings")
    return out
Exemplo n.º 18
0
def perceptual_params(images, reuse=False):
    """get semntics params of images
    Args:
        images: input images for generator
        reuse: whether to reuse network variables or not
    return: 
        sementics params of images
    """
    with tf.variable_scope("semantic",reuse=reuse):
        # first generator network
        with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
            logits, end_points = mobilenet_v1.mobilenet_v1(images, num_classes=1001, dropout_keep_prob=1, is_training=False)
    return tf.squeeze(end_points['AvgPool_1a'],[1,2])
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '',
                       '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=True, regularize_depthwise=True)):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
          with slim.arg_scope([slim.batch_norm], fused=False):
            _, image_features = mobilenet_v1.mobilenet_v1_base(
                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                final_endpoint='Conv2d_13_pointwise',
                min_depth=self._min_depth,
                depth_multiplier=self._depth_multiplier,
                use_explicit_padding=self._use_explicit_padding,
                scope=scope)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        # TODO(skligys): Enable fused batch norm once quantization supports it.
        with slim.arg_scope([slim.batch_norm], fused=False):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
Exemplo n.º 20
0
def build_model():
  """Builds graph for model to train with rewrites for quantization.

  Returns:
    g: Graph with fake quantization ops and batch norm folding suitable for
    training quantized weights.
    train_tensor: Train op for execution during training.
  """
  g = tf.Graph()
  with g.as_default(), tf.device(
      tf.train.replica_device_setter(FLAGS.ps_tasks)):
    inputs, labels = imagenet_input(is_training=True)
    with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(is_training=True)):
      logits, _ = mobilenet_v1.mobilenet_v1(
          inputs,
          is_training=True,
          depth_multiplier=FLAGS.depth_multiplier,
          num_classes=FLAGS.num_classes)

    tf.losses.softmax_cross_entropy(labels, logits)

    # Call rewriter to produce graph with fake quant ops and folded batch norms
    # quant_delay delays start of quantization till quant_delay steps, allowing
    # for better model accuracy.
    if FLAGS.quantize:
      tf.contrib.quantize.create_training_graph(quant_delay=get_quant_delay())

    total_loss = tf.losses.get_total_loss(name='total_loss')
    # Configure the learning rate using an exponential decay.
    num_epochs_per_decay = 2.5
    imagenet_size = 1271167
    decay_steps = int(imagenet_size / FLAGS.batch_size * num_epochs_per_decay)

    learning_rate = tf.train.exponential_decay(
        get_learning_rate(),
        tf.train.get_or_create_global_step(),
        decay_steps,
        _LEARNING_RATE_DECAY_FACTOR,
        staircase=True)
    opt = tf.train.GradientDescentOptimizer(learning_rate)

    train_tensor = slim.learning.create_train_op(
        total_loss,
        optimizer=opt)

  slim.summaries.add_scalar_summary(total_loss, 'total_loss', 'losses')
  slim.summaries.add_scalar_summary(learning_rate, 'learning_rate', 'training')
  return g, train_tensor
    def create_model(self, inputs, num_classes, is_training):
        with slim.arg_scope(
                mobilenet_v1_arg_scope(
                    is_training,
                    FLAGS.weight_decay,
                    regularize_depthwise=FLAGS.regularize_depthwise)):
            self.logits, self.end_points = mobilenet_v1(
                inputs,
                num_classes,
                FLAGS.dropout_keep_prob,
                is_training,
                depth_multiplier=FLAGS.depth_multiplier)

        for var in tf.model_variables():
            if 'weights' in var.op.name:
                tf.add_to_collection(tf.GraphKeys.WEIGHTS, var)
Exemplo n.º 22
0
def mobilenet_v1_050(inputs, is_training, opts):
    with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope(
            is_training=is_training,
            weight_decay=opts.weight_decay,
            stddev=0.09,
            regularize_depthwise=False,
            batch_norm_decay=opts.batch_norm_decay,
            batch_norm_epsilon=opts.batch_norm_epsilon)):
        return mobilenet_v1.mobilenet_v1_050(
            inputs,
            num_classes=opts.num_classes,
            dropout_keep_prob=opts.dropout_keep_prob,
            is_training=is_training,
            min_depth=8,
            global_pool=opts.global_pool,
            spatial_squeeze=opts.spatial_squeeze,
            reuse=None)
Exemplo n.º 23
0
def freeze_mobilenet(meta_file, img_size=224, factor=1.0, num_classes=1001):

    tf.reset_default_graph()

    inp = tf.placeholder(tf.float32,
                         shape=(None, img_size, img_size, 3),
                         name="input")

    is_training = False
    weight_decay = 0.0
    arg_scope = mobilenet_v1.mobilenet_v1_arg_scope(weight_decay=weight_decay)
    with slim.arg_scope(arg_scope):
        logits, _ = mobilenet_v1.mobilenet_v1(inp,
                                              num_classes=num_classes,
                                              is_training=is_training,
                                              depth_multiplier=factor)

    predictions = tf.contrib.layers.softmax(logits)
    output = tf.identity(predictions, name='output')

    ckpt_file = meta_file.replace('.meta', '')
    output_graph_fn = ckpt_file.replace('.ckpt', '.pb')
    output_node_names = "output"

    rest_var = slim.get_variables_to_restore()

    with tf.Session() as sess:
        graph = tf.get_default_graph()
        input_graph_def = graph.as_graph_def()

        saver = tf.train.Saver(rest_var)
        saver.restore(sess, ckpt_file)

        # We use a built-in TF helper to export variables to constant
        output_graph_def = graph_util.convert_variables_to_constants(
            sess,  # The session is used to retrieve the weights
            input_graph_def,  # The graph_def is used to retrieve the nodes
            output_node_names.split(
                ","
            )  # The output node names are used to select the usefull nodes
        )

        # Finally we serialize and dump the output graph to the filesystem
        with tf.gfile.GFile(output_graph_fn, "wb") as f:
            f.write(output_graph_def.SerializeToString())
        print("{} ops in the final graph.".format(len(output_graph_def.node)))
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=False, weight_decay=self._weight_decay)):
                with tf.variable_scope('MobilenetV1',
                                       reuse=self._reuse_weights) as scope:
                    params = {}
                    if self._skip_last_stride:
                        params[
                            'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs(
                                conv_depth_ratio_in_percentage=self.
                                _conv_depth_ratio_in_percentage)
                    _, activations = mobilenet_v1.mobilenet_v1_base(
                        preprocessed_inputs,
                        final_endpoint='Conv2d_11_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope,
                        **params)
            return activations['Conv2d_11_pointwise']
def freeze_mobilenet(meta_file, img_size=224, factor=1.0, num_classes=1001):

  tf.reset_default_graph()

  inp = tf.placeholder(tf.float32,
                      shape=(None, img_size, img_size, 3),
                      name="input")

  is_training=False
  weight_decay = 0.0
  arg_scope = mobilenet_v1.mobilenet_v1_arg_scope(weight_decay=weight_decay)
  with slim.arg_scope(arg_scope):
    logits, _ = mobilenet_v1.mobilenet_v1(inp,
                                          num_classes=num_classes,
                                          is_training=is_training,
                                          depth_multiplier=factor)

  predictions = tf.contrib.layers.softmax(logits)
  output = tf.identity(predictions, name='output')

  ckpt_file = meta_file.replace('.meta', '')
  output_graph_fn = ckpt_file.replace('.ckpt', '.pb')
  output_node_names = "output"

  rest_var = slim.get_variables_to_restore()

  with tf.Session() as sess:
    graph = tf.get_default_graph()
    input_graph_def = graph.as_graph_def()

    saver = tf.train.Saver(rest_var)
    saver.restore(sess, ckpt_file)

    # We use a built-in TF helper to export variables to constant
    output_graph_def = graph_util.convert_variables_to_constants(
        sess, # The session is used to retrieve the weights
        input_graph_def, # The graph_def is used to retrieve the nodes
        # The output node names are used to select the useful nodes
        output_node_names.split(",")
    )

    # Finally we serialize and dump the output graph to the filesystem
    with tf.gfile.GFile(output_graph_fn, "wb") as f:
        f.write(output_graph_def.SerializeToString())
    print("{} ops in the final graph.".format(len(output_graph_def.node)))
Exemplo n.º 26
0
def discriminator(images, kp, n_output=10, reuse=False, train=True):
    """define discriminator model 
    Args:
        images: input images for discriminator
        kp: keeping probality for droping layer of discriminator
        n_output: discriminator output size
        reuse: whether reuse variable or not 
        train: training mode or inference mode
    return: 
        preds: discriminator output containing image aesthetic rating and other variables
    """
    with tf.variable_scope("discriminator", reuse=reuse):
        with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
            logits, end_points = mobilenet_v1.mobilenet_v1(images, num_classes=n_output,
                                                       dropout_keep_prob=kp, is_training=train)
    preds = tf.nn.softmax(logits)
    print("preds: ",preds.get_shape().as_list())
    return preds
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs.get_shape().assert_has_rank(4)
        preprocessed_inputs = shape_utils.check_min_image_dim(
            min_dim=33, image_tensor=preprocessed_inputs)

        with slim.arg_scope(
                mobilenet_v1.mobilenet_v1_arg_scope(
                    is_training=self._train_batch_norm,
                    weight_decay=self._weight_decay)):
            with tf.variable_scope('MobilenetV1',
                                   reuse=self._reuse_weights) as scope:
                params = {}
                if self._skip_last_stride:
                    params[
                        'conv_defs'] = _get_mobilenet_conv_no_last_stride_defs(
                            conv_depth_ratio_in_percentage=self.
                            _conv_depth_ratio_in_percentage)
                _, activations = mobilenet_v1.mobilenet_v1_base(
                    preprocessed_inputs,
                    final_endpoint='Conv2d_11_pointwise',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope,
                    **params)
        return activations['Conv2d_11_pointwise'], activations
Exemplo n.º 28
0
def export_eval_pbtxt():
    """Export eval.pbtxt."""
    g = tf.Graph()
    with g.as_default():
        inputs = tf.placeholder(dtype=tf.float32,
                                shape=[None, IMAGE_SIZE, IMAGE_SIZE, CHANNEL])
        scope = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False,
                                                    weight_decay=0.0)
        with slim.arg_scope(scope):
            _, _ = mobilenet_v1.mobilenet_v1(
                inputs,
                is_training=False,
                depth_multiplier=FLAGS.depth_multiplier,
                num_classes=FLAGS.num_classes)
        if FLAGS.quantize:
            tf.contrib.quantize.create_eval_graph()
        with tf.Session() as sess:
            with open(FLAGS.eval_graph_file, 'w') as f:
                f.write(str(g.as_graph_def()))
  def _extract_proposal_features(self, preprocessed_inputs, scope):
    """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

    preprocessed_inputs.get_shape().assert_has_rank(4)
    preprocessed_inputs = shape_utils.check_min_image_dim(
        min_dim=33, image_tensor=preprocessed_inputs)

    with slim.arg_scope(
        mobilenet_v1.mobilenet_v1_arg_scope(
            is_training=self._train_batch_norm,
            weight_decay=self._weight_decay)):
      with tf.variable_scope('MobilenetV1',
                             reuse=self._reuse_weights) as scope:
        params = {}
        if self._skip_last_stride:
          params['conv_defs'] = _get_mobilenet_conv_no_last_stride_defs(
              conv_depth_ratio_in_percentage=self.
              _conv_depth_ratio_in_percentage)
        _, activations = mobilenet_v1.mobilenet_v1_base(
            preprocessed_inputs,
            final_endpoint='Conv2d_11_pointwise',
            min_depth=self._min_depth,
            depth_multiplier=self._depth_multiplier,
            scope=scope,
            **params)
    return activations['Conv2d_11_pointwise'], activations
Exemplo n.º 30
0
  def __call__(self, x_input, batch_size=None, is_training=False):
    """Constructs model and return probabilities for given input."""
    reuse = True if self.built else None

    preproc = tf.map_fn(
      lambda img: inception_preprocess(img,
                                       mobilenet_v1.mobilenet_v1.default_image_size,
                                       mobilenet_v1.mobilenet_v1.default_image_size), x_input)

    with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
      with tf.variable_scope(self.ckpt):
        logits, end_points = mobilenet_v1.mobilenet_v1(
            preproc, num_classes=self.num_classes, is_training=is_training,
            reuse=reuse)

      preds = tf.argmax(logits, axis=1)
    self.built = True
    self.logits = logits
    self.preds = preds
    return logits
  def _extract_box_classifier_features(self, proposal_feature_maps, scope):
    """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
    net = proposal_feature_maps

    conv_depth = 1024
    if self._skip_last_stride:
      conv_depth_ratio = float(self._conv_depth_ratio_in_percentage) / 100.0
      conv_depth = int(float(conv_depth) * conv_depth_ratio)

    depth = lambda d: max(int(d * 1.0), 16)
    with tf.variable_scope('MobilenetV1', reuse=self._reuse_weights):
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=self._train_batch_norm,
              weight_decay=self._weight_decay)):
        with slim.arg_scope(
            [slim.conv2d, slim.separable_conv2d], padding='SAME'):
          net = slim.separable_conv2d(
              net,
              depth(conv_depth), [3, 3],
              depth_multiplier=1,
              stride=2,
              scope='Conv2d_12_pointwise')
          return slim.separable_conv2d(
              net,
              depth(conv_depth), [3, 3],
              depth_multiplier=1,
              stride=1,
              scope='Conv2d_13_pointwise')
Exemplo n.º 32
0
    def extract_features(self, preprocessed_inputs):

        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '',
             ''][:self._num_layers],
            'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
Exemplo n.º 33
0
def build_model():
    """Build the mobilenet_v1 model for evaluation.

  Returns:
    g: graph with rewrites after insertion of quantization ops and batch norm
    folding.
    eval_ops: eval ops for inference.
    variables_to_restore: List of variables to restore from checkpoint.
  """
    g = tf.Graph()
    with g.as_default():
        inputs, labels = dataset_input(is_training=False)
        # inputs, labels = merge_dataset(is_training=False)

        scope = mobilenet_v1.mobilenet_v1_arg_scope(is_training=False,
                                                    weight_decay=0.0)
        with slim.arg_scope(scope):
            dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                                  FLAGS.dataset_split_name,
                                                  FLAGS.dataset_dir)
            logits, _ = mobilenet_v1.mobilenet_v1(
                inputs,
                is_training=False,
                depth_multiplier=FLAGS.depth_multiplier,
                num_classes=dataset.num_classes,
                model_scope=FLAGS.model_scope,
                logits_scope=FLAGS.logits_scope,
                conv2d_0_scope=FLAGS.conv2d_0_scope,
                depthwise_scope=FLAGS.depthwise_scope,
                pointwise_scope=FLAGS.pointwise_scope,
                pointwise_merged_mask=FLAGS.pointwise_merged_mask)

        if FLAGS.quantize:
            tf.contrib.quantize.create_eval_graph()

        eval_ops = metrics(logits, labels)

    tf.logging.info('Evaluating %s' % FLAGS.checkpoint_path)
    return g, eval_ops
  def _extract_proposal_features(self, preprocessed_inputs, scope):
    """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    with tf.control_dependencies([shape_assert]):
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=self._train_batch_norm,
              weight_decay=self._weight_decay)):
        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
          _, activations = mobilenet_v1.mobilenet_v1_base(
              preprocessed_inputs,
              final_endpoint='Conv2d_11_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
    return activations['Conv2d_11_pointwise'], activations
Exemplo n.º 35
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
                    base_feature_map_depth=0,
                    num_layers=6,
                    image_features={
                        'image_features': image_features['Conv2d_11_pointwise']
                    })
        return feature_maps.values()
Exemplo n.º 36
0
 def create(self, images, num_classes, is_training):
   """See baseclass."""
   with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
     _, endpoints = mobilenet_v1.mobilenet_v1(
         inputs=images, num_classes=num_classes, is_training=is_training)
     return endpoints
    def extract_features(self,
                         preprocessed_inputs,
                         state_saver=None,
                         state_name='lstm_state',
                         unroll_length=5,
                         scope=None):
        """Extracts features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <parent scope>/MobilenetV1/...
    <parent scope>/LSTM/...
    <parent scope>/FeatureMaps/...

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: A python string for the name to use with the state_saver.
      unroll_length: The number of steps to unroll the lstm.
      scope: The scope for the base network of the feature extractor.

    Returns:
      A list of tensors where the ith tensor has shape [batch, height_i,
      width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)
        with slim.arg_scope(
                mobilenet_v1.mobilenet_v1_arg_scope(
                    is_training=self._is_training)):
            with (slim.arg_scope(self._conv_hyperparams_fn())
                  if self._override_base_feature_extractor_hyperparams else
                  context_manager.IdentityContextManager()):
                with slim.arg_scope([slim.batch_norm], fused=False):
                    # Base network.
                    with tf.variable_scope(scope,
                                           self._base_network_scope,
                                           reuse=self._reuse_weights) as scope:
                        net, image_features = mobilenet_v1.mobilenet_v1_base(
                            ops.pad_to_multiple(preprocessed_inputs,
                                                self._pad_to_multiple),
                            final_endpoint='Conv2d_13_pointwise',
                            min_depth=self._min_depth,
                            depth_multiplier=self._depth_multiplier,
                            scope=scope)

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with slim.arg_scope([slim.batch_norm],
                                fused=False,
                                is_training=self._is_training):
                # ConvLSTM layers.
                with tf.variable_scope(
                        'LSTM', reuse=self._reuse_weights) as lstm_scope:
                    lstm_cell = lstm_cells.BottleneckConvLSTMCell(
                        filter_size=(3, 3),
                        output_size=(net.shape[1].value, net.shape[2].value),
                        num_units=max(self._min_depth, self._lstm_state_depth),
                        activation=tf.nn.relu6,
                        visualize_gates=True)

                    net_seq = list(tf.split(net, unroll_length))
                    if state_saver is None:
                        init_state = lstm_cell.init_state(
                            state_name, net.shape[0].value / unroll_length,
                            tf.float32)
                    else:
                        c = state_saver.state('%s_c' % state_name)
                        h = state_saver.state('%s_h' % state_name)
                        init_state = (c, h)

                    # Identities added for inputing state tensors externally.
                    c_ident = tf.identity(init_state[0],
                                          name='lstm_state_in_c')
                    h_ident = tf.identity(init_state[1],
                                          name='lstm_state_in_h')
                    init_state = (c_ident, h_ident)

                    net_seq, states_out = rnn_decoder.rnn_decoder(
                        net_seq, init_state, lstm_cell, scope=lstm_scope)
                    batcher_ops = None
                    self._states_out = states_out
                    if state_saver is not None:
                        self._step = state_saver.state('%s_step' % state_name)
                        batcher_ops = [
                            state_saver.save_state('%s_c' % state_name,
                                                   states_out[-1][0]),
                            state_saver.save_state('%s_h' % state_name,
                                                   states_out[-1][1]),
                            state_saver.save_state('%s_step' % state_name,
                                                   self._step - 1)
                        ]
                    with tf_ops.control_dependencies(batcher_ops):
                        image_features['Conv2d_13_pointwise_lstm'] = tf.concat(
                            net_seq, 0)

                    # Identities added for reading output states, to be reused externally.
                    tf.identity(states_out[-1][0], name='lstm_state_out_c')
                    tf.identity(states_out[-1][1], name='lstm_state_out_h')

                # SSD layers.
                with tf.variable_scope('FeatureMaps',
                                       reuse=self._reuse_weights):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=self._feature_map_layout,
                        depth_multiplier=(self._depth_multiplier),
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
Exemplo n.º 38
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: if image height or width are not 256 pixels.
    """
        image_shape = preprocessed_inputs.get_shape()
        image_shape.assert_has_rank(4)
        image_height = image_shape[1].value
        image_width = image_shape[2].value

        if image_height is None or image_width is None:
            shape_assert = tf.Assert(
                tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
                               tf.equal(tf.shape(preprocessed_inputs)[2],
                                        256)),
                ['image size must be 256 in both height and width.'])
            with tf.control_dependencies([shape_assert]):
                preprocessed_inputs = tf.identity(preprocessed_inputs)
        elif image_height != 256 or image_width != 256:
            raise ValueError(
                'image size must be = 256 in both height and width;'
                ' image dim = %d,%d' % (image_height, image_width))

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256],
            'conv_kernel_size': [-1, -1, 3, 3, 2],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
  def extract_features(self,
                       preprocessed_inputs,
                       state_saver=None,
                       state_name='lstm_state',
                       unroll_length=5,
                       scope=None):
    """Extracts features from preprocessed inputs.

    The features include the base network features, lstm features and SSD
    features, organized in the following name scope:

    <parent scope>/MobilenetV1/...
    <parent scope>/LSTM/...
    <parent scope>/FeatureMaps/...

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float tensor
        representing a batch of consecutive frames from video clips.
      state_saver: A state saver object with methods `state` and `save_state`.
      state_name: A python string for the name to use with the state_saver.
      unroll_length: The number of steps to unroll the lstm.
      scope: The scope for the base network of the feature extractor.

    Returns:
      A list of tensors where the ith tensor has shape [batch, height_i,
      width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)
    with slim.arg_scope(
        mobilenet_v1.mobilenet_v1_arg_scope(is_training=self._is_training)):
      with (slim.arg_scope(self._conv_hyperparams_fn())
            if self._override_base_feature_extractor_hyperparams else
            context_manager.IdentityContextManager()):
        with slim.arg_scope([slim.batch_norm], fused=False):
          # Base network.
          with tf.variable_scope(
              scope, self._base_network_scope,
              reuse=self._reuse_weights) as scope:
            net, image_features = mobilenet_v1.mobilenet_v1_base(
                ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
                final_endpoint='Conv2d_13_pointwise',
                min_depth=self._min_depth,
                depth_multiplier=self._depth_multiplier,
                scope=scope)

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with slim.arg_scope(
          [slim.batch_norm], fused=False, is_training=self._is_training):
        # ConvLSTM layers.
        with tf.variable_scope('LSTM', reuse=self._reuse_weights) as lstm_scope:
          lstm_cell = lstm_cells.BottleneckConvLSTMCell(
              filter_size=(3, 3),
              output_size=(net.shape[1].value, net.shape[2].value),
              num_units=max(self._min_depth, self._lstm_state_depth),
              activation=tf.nn.relu6,
              visualize_gates=True)

          net_seq = list(tf.split(net, unroll_length))
          if state_saver is None:
            init_state = lstm_cell.init_state(
                state_name, net.shape[0].value / unroll_length, tf.float32)
          else:
            c = state_saver.state('%s_c' % state_name)
            h = state_saver.state('%s_h' % state_name)
            init_state = (c, h)

          # Identities added for inputing state tensors externally.
          c_ident = tf.identity(init_state[0], name='lstm_state_in_c')
          h_ident = tf.identity(init_state[1], name='lstm_state_in_h')
          init_state = (c_ident, h_ident)

          net_seq, states_out = rnn_decoder.rnn_decoder(
              net_seq, init_state, lstm_cell, scope=lstm_scope)
          batcher_ops = None
          self._states_out = states_out
          if state_saver is not None:
            self._step = state_saver.state('%s_step' % state_name)
            batcher_ops = [
                state_saver.save_state('%s_c' % state_name, states_out[-1][0]),
                state_saver.save_state('%s_h' % state_name, states_out[-1][1]),
                state_saver.save_state('%s_step' % state_name, self._step - 1)
            ]
          with tf_ops.control_dependencies(batcher_ops):
            image_features['Conv2d_13_pointwise_lstm'] = tf.concat(net_seq, 0)

          # Identities added for reading output states, to be reused externally.
          tf.identity(states_out[-1][0], name='lstm_state_out_c')
          tf.identity(states_out[-1][1], name='lstm_state_out_h')

        # SSD layers.
        with tf.variable_scope('FeatureMaps', reuse=self._reuse_weights):
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=self._feature_map_layout,
              depth_multiplier=(self._depth_multiplier),
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: if image height or width are not 256 pixels.
    """
    image_shape = preprocessed_inputs.get_shape()
    image_shape.assert_has_rank(4)
    image_height = image_shape[1].value
    image_width = image_shape[2].value

    if image_height is None or image_width is None:
      shape_assert = tf.Assert(
          tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
                         tf.equal(tf.shape(preprocessed_inputs)[2], 256)),
          ['image size must be 256 in both height and width.'])
      with tf.control_dependencies([shape_assert]):
        preprocessed_inputs = tf.identity(preprocessed_inputs)
    elif image_height != 256 or image_width != 256:
      raise ValueError('image size must be = 256 in both height and width;'
                       ' image dim = %d,%d' % (image_height, image_width))

    feature_map_layout = {
        'from_layer': [
            'Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''
        ],
        'layer_depth': [-1, -1, 512, 256, 256],
        'conv_kernel_size': [-1, -1, 3, 3, 2],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features)

    return feature_maps.values()
Exemplo n.º 41
0
 def testBatchNormScopeDoesNotHaveIsTrainingWhenItsSetToNone(self):
   sc = mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)
   self.assertNotIn('is_training', sc[slim.arg_scope_func_key(
       slim.batch_norm)])
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    with tf.variable_scope('MobilenetV1',
                           reuse=self._reuse_weights) as scope:
      with slim.arg_scope(
          mobilenet_v1.mobilenet_v1_arg_scope(
              is_training=None, regularize_depthwise=True)):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams
              else context_manager.IdentityContextManager()):
          _, image_features = mobilenet_v1.mobilenet_v1_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Conv2d_13_pointwise',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              use_explicit_padding=self._use_explicit_padding,
              scope=scope)

      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope('fpn', reuse=self._reuse_weights):
          feature_blocks = [
              'Conv2d_3_pointwise', 'Conv2d_5_pointwise', 'Conv2d_11_pointwise',
              'Conv2d_13_pointwise'
          ]
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append(feature_blocks[level - 2])
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(256))
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(fpn_features['top_down_{}'.format(
                feature_blocks[level - 2])])
          last_feature_map = fpn_features['top_down_{}'.format(
              feature_blocks[base_fpn_max_level - 2])]
          # Construct coarse features
          for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1):
            last_feature_map = slim.conv2d(
                last_feature_map,
                num_outputs=depth_fn(256),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_Conv2d_{}'.format(i - base_fpn_max_level + 13))
            feature_maps.append(last_feature_map)
    return feature_maps