Exemplo n.º 1
0
  def testForwardpassDeepLabv3plus(self):
    crop_size = [33, 33]
    outputs_to_num_classes = {'semantic': 3}

    model_options = common.ModelOptions(
        outputs_to_num_classes,
        crop_size,
        output_stride=16
    )._replace(
        add_image_level_feature=True,
        aspp_with_batch_norm=True,
        logits_kernel_size=1,
        model_variant='mobilenet_v2')  # Employ MobileNetv2 for fast test.

    g = tf.Graph()
    with g.as_default():
      with self.test_session(graph=g) as sess:
        inputs = tf.random_uniform(
            (1, crop_size[0], crop_size[1], 3))
        outputs_to_scales_to_logits = model.multi_scale_logits(
            inputs,
            model_options,
            image_pyramid=[1.0])

        sess.run(tf.global_variables_initializer())
        outputs_to_scales_to_logits = sess.run(outputs_to_scales_to_logits)

        # Check computed results for each output type.
        for output in outputs_to_num_classes:
          scales_to_logits = outputs_to_scales_to_logits[output]
          # Expect only one output.
          self.assertEquals(len(scales_to_logits), 1)
          for logits in scales_to_logits.values():
            self.assertTrue(logits.any())
Exemplo n.º 2
0
 def testBuildDeepLabWithDensePredictionCell(self):
   batch_size = 1
   crop_size = [33, 33]
   outputs_to_num_classes = {'semantic': 2}
   expected_endpoints = ['merged_logits']
   dense_prediction_cell_config = [
       {'kernel': 3, 'rate': [1, 6], 'op': 'conv', 'input': -1},
       {'kernel': 3, 'rate': [18, 15], 'op': 'conv', 'input': 0},
   ]
   model_options = common.ModelOptions(
       outputs_to_num_classes,
       crop_size,
       output_stride=16)._replace(
           aspp_with_batch_norm=True,
           model_variant='mobilenet_v2',
           dense_prediction_cell_config=dense_prediction_cell_config)
   g = tf.Graph()
   with g.as_default():
     with self.test_session(graph=g):
       inputs = tf.random_uniform(
           (batch_size, crop_size[0], crop_size[1], 3))
       outputs_to_scales_to_model_results = model.multi_scale_logits(
           inputs,
           model_options,
           image_pyramid=[1.0])
       for output in outputs_to_num_classes:
         scales_to_model_results = outputs_to_scales_to_model_results[output]
         self.assertListEqual(
             list(scales_to_model_results), expected_endpoints)
         self.assertEqual(len(scales_to_model_results), 1)
Exemplo n.º 3
0
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label):
  """Builds a clone of DeepLab.

  Args:
    inputs_queue: A prefetch queue for images and labels.
    outputs_to_num_classes: A map from output type to the number of classes.
      For example, for the task of semantic segmentation with 21 semantic
      classes, we would have outputs_to_num_classes['semantic'] = 21.
    ignore_label: Ignore label.

  Returns:
    A map of maps from output_type (e.g., semantic prediction) to a
      dictionary of multi-scale logits names to logits. For each output_type,
      the dictionary has keys which correspond to the scales and values which
      correspond to the logits. For example, if `scales` equals [1.0, 1.5],
      then the keys would include 'merged_logits', 'logits_1.00' and
      'logits_1.50'.
  """
  samples = inputs_queue.dequeue()

  # Add name to input and label nodes so we can add to summary.
  samples[common.IMAGE] = tf.identity(
      samples[common.IMAGE], name=common.IMAGE)
  samples[common.LABEL] = tf.identity(
      samples[common.LABEL], name=common.LABEL)

  model_options = common.ModelOptions(
      outputs_to_num_classes=outputs_to_num_classes,
      crop_size=FLAGS.train_crop_size,
      atrous_rates=FLAGS.atrous_rates,
      output_stride=FLAGS.output_stride)
  outputs_to_scales_to_logits = model.multi_scale_logits(
      samples[common.IMAGE],
      model_options=model_options,
      image_pyramid=FLAGS.image_pyramid,
      weight_decay=FLAGS.weight_decay,
      is_training=True,
      fine_tune_batch_norm=FLAGS.fine_tune_batch_norm)

  # Add name to graph node so we can add to summary.
  output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
  output_type_dict[model.get_merged_logits_scope()] = tf.identity(
      output_type_dict[model.get_merged_logits_scope()],
      name=common.OUTPUT_TYPE)

  for output, num_classes in six.iteritems(outputs_to_num_classes):
    train_utils.add_softmax_cross_entropy_loss_for_each_scale(
        outputs_to_scales_to_logits[output],
        samples[common.LABEL],
        num_classes,
        ignore_label,
        loss_weight=1.0,
        upsample_logits=FLAGS.upsample_logits,
        scope=output)

  return outputs_to_scales_to_logits
Exemplo n.º 4
0
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label):
  """Builds a clone of DeepLab.

  Args:
    inputs_queue: A prefetch queue for images and labels.
    outputs_to_num_classes: A map from output type to the number of classes.
      For example, for the task of semantic segmentation with 21 semantic
      classes, we would have outputs_to_num_classes['semantic'] = 21.
    ignore_label: Ignore label.

  Returns:
    A map of maps from output_type (e.g., semantic prediction) to a
      dictionary of multi-scale logits names to logits. For each output_type,
      the dictionary has keys which correspond to the scales and values which
      correspond to the logits. For example, if `scales` equals [1.0, 1.5],
      then the keys would include 'merged_logits', 'logits_1.00' and
      'logits_1.50'.
  """
  samples = inputs_queue.dequeue()

  # Add name to input and label nodes so we can add to summary.
  samples[common.IMAGE] = tf.identity(
      samples[common.IMAGE], name=common.IMAGE)
  samples[common.LABEL] = tf.identity(
      samples[common.LABEL], name=common.LABEL)

  model_options = common.ModelOptions(
      outputs_to_num_classes=outputs_to_num_classes,
      crop_size=FLAGS.train_crop_size,
      atrous_rates=FLAGS.atrous_rates,
      output_stride=FLAGS.output_stride)
  outputs_to_scales_to_logits = model.multi_scale_logits(
      samples[common.IMAGE],
      model_options=model_options,
      image_pyramid=FLAGS.image_pyramid,
      weight_decay=FLAGS.weight_decay,
      is_training=True,
      fine_tune_batch_norm=FLAGS.fine_tune_batch_norm)

  # Add name to graph node so we can add to summary.
  output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
  output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity(
      output_type_dict[model.MERGED_LOGITS_SCOPE],
      name=common.OUTPUT_TYPE)

  for output, num_classes in six.iteritems(outputs_to_num_classes):
    train_utils.add_softmax_cross_entropy_loss_for_each_scale(
        outputs_to_scales_to_logits[output],
        samples[common.LABEL],
        num_classes,
        ignore_label,
        loss_weight=1.0,
        upsample_logits=FLAGS.upsample_logits,
        scope=output)

  return outputs_to_scales_to_logits
Exemplo n.º 5
0
def _build_deeplab(iterator, outputs_to_num_classes, ignore_label):
    """Builds a clone of DeepLab.

  Args:
    iterator: An iterator of type tf.data.Iterator for images and labels.
    outputs_to_num_classes: A map from output type to the number of classes. For
      example, for the task of semantic segmentation with 21 semantic classes,
      we would have outputs_to_num_classes['semantic'] = 21.
    ignore_label: Ignore label.
  """
    samples = iterator.get_next()

    # Add name to input and label nodes so we can add to summary.
    samples[common.IMAGE] = tf.identity(samples[common.IMAGE],
                                        name=common.IMAGE)
    samples[common.LABEL] = tf.identity(samples[common.LABEL],
                                        name=common.LABEL)

    model_options = common.ModelOptions(
        outputs_to_num_classes=outputs_to_num_classes,
        crop_size=[int(sz) for sz in FLAGS.train_crop_size],
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    outputs_to_scales_to_logits = model.multi_scale_logits(
        samples[common.IMAGE],
        model_options=model_options,
        image_pyramid=FLAGS.image_pyramid,
        weight_decay=FLAGS.weight_decay,
        is_training=True,
        fine_tune_batch_norm=FLAGS.fine_tune_batch_norm,
        nas_training_hyper_parameters={
            'drop_path_keep_prob': FLAGS.drop_path_keep_prob,
            'total_training_steps': FLAGS.training_number_of_steps,
        })

    # Add name to graph node so we can add to summary.
    output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
    output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity(
        output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE)

    for output, num_classes in six.iteritems(outputs_to_num_classes):
        train_utils.add_softmax_cross_entropy_loss_for_each_scale(
            outputs_to_scales_to_logits[output],
            samples[common.LABEL],
            num_classes,
            ignore_label,
            loss_weight=model_options.label_weights,
            upsample_logits=FLAGS.upsample_logits,
            hard_example_mining_step=FLAGS.hard_example_mining_step,
            top_k_percent_pixels=FLAGS.top_k_percent_pixels,
            scope=output,
            # my code is here
            use_hybrid_loss=FLAGS.use_hybrid_loss,
            batch_size=FLAGS.train_batch_size // FLAGS.num_clones)
Exemplo n.º 6
0
def _build_deeplab(iterator, outputs_to_num_classes, ignore_label):
  """Builds a clone of DeepLab.

  Args:
    iterator: An iterator of type tf.data.Iterator for images and labels.
    outputs_to_num_classes: A map from output type to the number of classes. For
      example, for the task of semantic segmentation with 21 semantic classes,
      we would have outputs_to_num_classes['semantic'] = 21.
    ignore_label: Ignore label.
  """
  samples = iterator.get_next()

  # Add name to input and label nodes so we can add to summary.
  samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE)
  samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL)

  model_options = common.ModelOptions(
      outputs_to_num_classes=outputs_to_num_classes,
      crop_size=FLAGS.train_crop_size,
      atrous_rates=FLAGS.atrous_rates,
      output_stride=FLAGS.output_stride)

  outputs_to_scales_to_logits = model.multi_scale_logits(
      samples[common.IMAGE],
      model_options=model_options,
      image_pyramid=FLAGS.image_pyramid,
      weight_decay=FLAGS.weight_decay,
      is_training=True,
      fine_tune_batch_norm=FLAGS.fine_tune_batch_norm,
      nas_training_hyper_parameters={
          'drop_path_keep_prob': FLAGS.drop_path_keep_prob,
          'total_training_steps': FLAGS.training_number_of_steps,
      })

  # Add name to graph node so we can add to summary.
  output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
  output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity(
      output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE)

  for output, num_classes in six.iteritems(outputs_to_num_classes):
    train_utils.add_softmax_cross_entropy_loss_for_each_scale(
        outputs_to_scales_to_logits[output],
        samples[common.LABEL],
        num_classes,
        ignore_label,
        loss_weight=1.0,
        upsample_logits=FLAGS.upsample_logits,
        hard_example_mining_step=FLAGS.hard_example_mining_step,
        top_k_percent_pixels=FLAGS.top_k_percent_pixels,
        scope=output)

    # Log the summary
    _log_summaries(samples[common.IMAGE], samples[common.LABEL], num_classes,
                   output_type_dict[model.MERGED_LOGITS_SCOPE])
Exemplo n.º 7
0
def _val_loss(dataset, image, label, num_of_classes, ignore_label):
    outputs_to_num_classes = {common.OUTPUT_TYPE: dataset.num_of_classes}
    val_summaries = []

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
        model_options = common.ModelOptions(
            outputs_to_num_classes=outputs_to_num_classes,
            crop_size=FLAGS.train_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        outputs_to_scales_to_logits = model.multi_scale_logits(
            image,
            model_options=model_options,
            image_pyramid=FLAGS.image_pyramid,
            weight_decay=FLAGS.weight_decay,
            is_training=True,
            fine_tune_batch_norm=FLAGS.fine_tune_batch_norm,
            nas_training_hyper_parameters={
                'drop_path_keep_prob': FLAGS.drop_path_keep_prob,
                'total_training_steps': FLAGS.training_number_of_steps,
            })

        with tf.name_scope('val_loss') as scope:
            for output, num_classes in six.iteritems(outputs_to_num_classes):
                train_utils.add_softmax_cross_entropy_loss_for_each_scale(
                    outputs_to_scales_to_logits[output],
                    label,
                    num_classes,
                    ignore_label,
                    loss_weight=1.0,
                    upsample_logits=FLAGS.upsample_logits,
                    hard_example_mining_step=FLAGS.hard_example_mining_step,
                    top_k_percent_pixels=FLAGS.top_k_percent_pixels,
                    scope=output)

            losses = tf.losses.get_losses(scope=scope)
            for loss in losses:
                tf.summary.scalar('Val_losses/%s' % loss.op.name, loss)

            regularization_loss = tf.losses.get_regularization_loss(
                scope=scope)
            tf.summary.scalar('Val_losses/%s' % regularization_loss.op.name,
                              regularization_loss)

            total_loss = tf.add_n([tf.add_n(losses), regularization_loss])
            val_summaries.append(
                tf.summary.scalar('total_validation_loss', total_loss))

    val_summary_op = tf.summary.merge(val_summaries)

    return total_loss, val_summary_op
Exemplo n.º 8
0
def _build_deeplab(iterator, outputs_to_num_classes, ignore_label):
  """Builds a clone of DeepLab.

  Args:
    iterator: An iterator of type tf.data.Iterator for images and labels.
    outputs_to_num_classes: A map from output type to the number of classes. For
      example, for the task of semantic segmentation with 21 semantic classes,
      we would have outputs_to_num_classes['semantic'] = 21.
    ignore_label: Ignore label.
  """
  samples = iterator.get_next()

  # Add name to input and label nodes so we can add to summary.
  samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE)
  samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL)

  model_options = common.ModelOptions(
      outputs_to_num_classes=outputs_to_num_classes,
      crop_size=[int(sz) for sz in FLAGS.train_crop_size],
      atrous_rates=FLAGS.atrous_rates,
      output_stride=FLAGS.output_stride) # Set to 8 to ensure tensor sizes match for concat op in model.py

  outputs_to_scales_to_logits = model.multi_scale_logits(
      samples[common.IMAGE],
      model_options=model_options,
      image_pyramid=FLAGS.image_pyramid, # not used. only for multi-scale. we use single-scale
      weight_decay=FLAGS.weight_decay, # use default in nas_network
      is_training=True,
      fine_tune_batch_norm=FLAGS.fine_tune_batch_norm, # define to False b/c we're using batch size 8
      nas_training_hyper_parameters={
          'drop_path_keep_prob': FLAGS.drop_path_keep_prob, # set to 1.0 earlier, but not sure what it should be
          'total_training_steps': FLAGS.training_number_of_steps, 
      })

  # Add name to graph node so we can add to summary.
  output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
  output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity(
      output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE)

  for output, num_classes in six.iteritems(outputs_to_num_classes):
    train_utils.add_softmax_cross_entropy_loss_for_each_scale(
        outputs_to_scales_to_logits[output],
        samples[common.LABEL],
        num_classes,
        ignore_label,
        loss_weight=model_options.label_weights,
        upsample_logits=FLAGS.upsample_logits, # set to True earlier
        hard_example_mining_step=FLAGS.hard_example_mining_step, # set to 0 earlier
        top_k_percent_pixels=FLAGS.top_k_percent_pixels, # set to 1 earlier
        scope=output)
Exemplo n.º 9
0
    def _construct_and_fill_model(self):
        # TODO: Factor out progress in base class
        progress_dummy = sly.Progress('Building model:', 1)
        progress_dummy.iter_done_report()

        self.device_ids = sly.env.remap_gpu_devices(self.config['gpu_devices'])
        src_size = self.config['input_size']
        self.input_size = (src_size['height'], src_size['width'])

        model_options = ModelOptions(
            outputs_to_num_classes={'semantic': self.model_out_dims},
            crop_size=self.input_size,
            atrous_rates=self.config['atrous_rates'],
            output_stride=self.config['output_stride'])

        self.inputs = tf.placeholder(tf.float32,
                                     [None] + list(self.input_size) + [3])
        self.labels = tf.placeholder(tf.int32,
                                     [None] + list(self.input_size) + [1])

        self.outputs_to_scales_to_logits = model.multi_scale_logits(
            images=self.inputs,
            model_options=model_options,
            image_pyramid=None,
            weight_decay=self.config['weight_decay'],
            is_training=True,
            fine_tune_batch_norm=False)

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            self.outputs_to_scales_to_logits_val = model.multi_scale_logits(
                images=self.inputs,
                model_options=model_options,
                image_pyramid=None,
                weight_decay=self.config['weight_decay'],
                is_training=False,
                fine_tune_batch_norm=False)
Exemplo n.º 10
0
    def testBuildDeepLabv2(self):
        batch_size = 2
        crop_size = [41, 41]

        # Test with two image_pyramids.
        image_pyramids = [[1], [0.5, 1]]

        # Test two model variants.
        model_variants = ['xception_65', 'mobilenet_v2']

        # Test with two output_types.
        outputs_to_num_classes = {'semantic': 3, 'direction': 2}

        expected_endpoints = [['merged_logits'],
                              ['merged_logits', 'logits_0.50', 'logits_1.00']]
        expected_num_logits = [1, 3]

        for model_variant in model_variants:
            model_options = common.ModelOptions(
                outputs_to_num_classes)._replace(
                    add_image_level_feature=False,
                    aspp_with_batch_norm=False,
                    aspp_with_separable_conv=False,
                    model_variant=model_variant)

            for i, image_pyramid in enumerate(image_pyramids):
                g = tf.Graph()
                with g.as_default():
                    with self.test_session(graph=g):
                        inputs = tf.random_uniform(
                            (batch_size, crop_size[0], crop_size[1], 3))
                        outputs_to_scales_to_logits = model.multi_scale_logits(
                            inputs, model_options, image_pyramid=image_pyramid)

                        # Check computed results for each output type.
                        for output in outputs_to_num_classes:
                            scales_to_logits = outputs_to_scales_to_logits[
                                output]
                            self.assertListEqual(
                                sorted(scales_to_logits.keys()),
                                sorted(expected_endpoints[i]))

                            # Expected number of logits = len(image_pyramid) + 1, since the
                            # last logits is merged from all the scales.
                            self.assertEqual(len(scales_to_logits),
                                             expected_num_logits[i])
Exemplo n.º 11
0
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label):
    """Builds a clone of DeepLab.

  Args:
    inputs_queue: A prefetch queue for images and labels.
    outputs_to_num_classes: A map from output type to the number of classes.
      For example, for the task of semantic segmentation with 21 semantic
      classes, we would have outputs_to_num_classes['semantic'] = 21.
    ignore_label: Ignore label.

  Returns:
    A map of maps from output_type (e.g., semantic prediction) to a
      dictionary of multi-scale logits names to logits. For each output_type,
      the dictionary has keys which correspond to the scales and values which
      correspond to the logits. For example, if `scales` equals [1.0, 1.5],
      then the keys would include 'merged_logits', 'logits_1.00' and
      'logits_1.50'.
  """
    samples = inputs_queue.dequeue()

    # add name input and label so we can add to summary
    samples[common.IMAGE] = tf.identity(samples[common.IMAGE], 'input_image')
    samples[common.LABEL] = tf.identity(samples[common.LABEL], 'input_label')

    # add name to graph node so we can add to summary
    outputs_to_scales_to_logits[common.OUTPUT_TYPE][
        model._MERGED_LOGITS_SCOPE] = tf.identity(
            outputs_to_scales_to_logits[common.OUTPUT_TYPE][
                model._MERGED_LOGITS_SCOPE],
            name='semantic_merged_logits')

    model_options = common.ModelOptions(
        outputs_to_num_classes=outputs_to_num_classes,
        crop_size=FLAGS.train_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)
    outputs_to_scales_to_logits = model.multi_scale_logits(
        samples[common.IMAGE],
        model_options=model_options,
        image_pyramid=FLAGS.image_pyramid,
        weight_decay=FLAGS.weight_decay,
        is_training=True,
        fine_tune_batch_norm=FLAGS.fine_tune_batch_norm)

    return outputs_to_scales_to_logits
Exemplo n.º 12
0
  def testBuildDeepLabv2(self):
    batch_size = 2
    crop_size = [41, 41]

    # Test with two image_pyramids.
    image_pyramids = [[1], [0.5, 1]]

    # Test two model variants.
    model_variants = ['xception_65', 'mobilenet_v2']

    # Test with two output_types.
    outputs_to_num_classes = {'semantic': 3,
                              'direction': 2}

    expected_endpoints = [['merged_logits'],
                          ['merged_logits',
                           'logits_0.50',
                           'logits_1.00']]
    expected_num_logits = [1, 3]

    for model_variant in model_variants:
      model_options = common.ModelOptions(outputs_to_num_classes)._replace(
          add_image_level_feature=False,
          aspp_with_batch_norm=False,
          aspp_with_separable_conv=False,
          model_variant=model_variant)

      for i, image_pyramid in enumerate(image_pyramids):
        g = tf.Graph()
        with g.as_default():
          with self.test_session(graph=g):
            inputs = tf.random_uniform(
                (batch_size, crop_size[0], crop_size[1], 3))
            outputs_to_scales_to_logits = model.multi_scale_logits(
                inputs, model_options, image_pyramid=image_pyramid)

            # Check computed results for each output type.
            for output in outputs_to_num_classes:
              scales_to_logits = outputs_to_scales_to_logits[output]
              self.assertListEqual(sorted(scales_to_logits.keys()),
                                   sorted(expected_endpoints[i]))

              # Expected number of logits = len(image_pyramid) + 1, since the
              # last logits is merged from all the scales.
              self.assertEqual(len(scales_to_logits), expected_num_logits[i])
def _build_deeplab_inputs(model_inputs, outputs_to_num_classes):
    """Builds a clone of DeepLab.
    MODIFIED FROM train.py-->_build_deeplab.
    The purpose of this function is just to build the model.
    """
    model_options = common.ModelOptions(
        outputs_to_num_classes=outputs_to_num_classes,
        crop_size=FLAGS.train_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)
    outputs_to_scales_to_logits = model.multi_scale_logits(
        model_inputs,
        model_options=model_options,
        image_pyramid=FLAGS.image_pyramid,
        weight_decay=0.01, #weight_decay=FLAGS.weight_decay,
        is_training=True,
        fine_tune_batch_norm=True) #FLAGS.fine_tune_batch_norm)

    return outputs_to_scales_to_logits
Exemplo n.º 14
0
 def testBuildDeepLabWithDensePredictionCell(self):
     batch_size = 1
     crop_size = [33, 33]
     outputs_to_num_classes = {'semantic': 2}
     expected_endpoints = ['merged_logits']
     dense_prediction_cell_config = [
         {
             'kernel': 3,
             'rate': [1, 6],
             'op': 'conv',
             'input': -1
         },
         {
             'kernel': 3,
             'rate': [18, 15],
             'op': 'conv',
             'input': 0
         },
     ]
     model_options = common.ModelOptions(
         outputs_to_num_classes, crop_size, output_stride=16)._replace(
             aspp_with_batch_norm=True,
             model_variant='mobilenet_v2',
             dense_prediction_cell_config=dense_prediction_cell_config)
     g = tf.Graph()
     with g.as_default():
         with self.test_session(graph=g):
             inputs = tf.random_uniform(
                 (batch_size, crop_size[0], crop_size[1], 3))
             outputs_to_scales_to_model_results = model.multi_scale_logits(
                 inputs, model_options, image_pyramid=[1.0])
             for output in outputs_to_num_classes:
                 scales_to_model_results = outputs_to_scales_to_model_results[
                     output]
                 #self.assertListEqual(scales_to_model_results.keys(),
                 self.assertListEqual(list(scales_to_model_results.keys()),
                                      expected_endpoints)
                 self.assertEqual(len(scales_to_model_results), 1)
Exemplo n.º 15
0
  def testForwardpassDeepLabv3plus(self):
    crop_size = [33, 33]
    outputs_to_num_classes = {'semantic': 3}

    model_options = common.ModelOptions(
        outputs_to_num_classes,
        crop_size,
        atrous_rates=[6],
        output_stride=16
    )._replace(
        add_image_level_feature=True,
        aspp_with_batch_norm=True,
        aspp_with_separable_conv=True,
        decoder_output_stride=4,
        decoder_use_separable_conv=True,
        logits_kernel_size=1,
        model_variant='xception_65')

    g = tf.Graph()
    with g.as_default():
      with self.test_session(graph=g) as sess:
        inputs = tf.random_uniform(
            (1, crop_size[0], crop_size[1], 3))
        outputs_to_scales_to_logits = model.multi_scale_logits(
            inputs,
            model_options,
            image_pyramid=[1.0])

        sess.run(tf.global_variables_initializer())
        outputs_to_scales_to_logits = sess.run(outputs_to_scales_to_logits)

        # Check computed results for each output type.
        for output in outputs_to_num_classes:
          scales_to_logits = outputs_to_scales_to_logits[output]
          # Expect only one output.
          self.assertEquals(len(scales_to_logits), 1)
          for logits in scales_to_logits.values():
            self.assertTrue(logits.any())
Exemplo n.º 16
0
    def train(self):
        FLAGS = self.flags
        image_batch, annotation_batch = get_dataset(
            FLAGS, mode=tf.estimator.ModeKeys.TRAIN)

        outputs_to_num_classes = {common.OUTPUT_TYPE: self.num_classes}
        model_options = common.ModelOptions(
            outputs_to_num_classes=outputs_to_num_classes,
            crop_size=FLAGS.train_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        # outputs_to_scales_to_logits[key_1][key_2]=logits
        # key_1 in outputs_to_num_classes.keys()
        # key_2 in ['logits_%.2f' % image_scale for image_scale in image_pyramid]+[MERGED_LOGITS_SCOPE]
        outputs_to_scales_to_logits = model.multi_scale_logits(
            image_batch,
            model_options=model_options,
            image_pyramid=FLAGS.image_pyramid,
            weight_decay=FLAGS.weight_decay,
            is_training=True,
            fine_tune_batch_norm=FLAGS.fine_tune_batch_norm)

        # Add name to graph node so we can add to summary.
        output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
        logits = output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity(
            output_type_dict[model.MERGED_LOGITS_SCOPE],
            name=common.OUTPUT_TYPE)
        labels = annotation_batch

        if FLAGS.upsample_logits:
            # Label is not downsampled, and instead we upsample logits.
            logits = tf.image.resize_bilinear(logits,
                                              tf.shape(labels)[1:3],
                                              align_corners=True)
            scaled_labels = labels
        else:
            # Label is downsampled to the same size as logits.
            scaled_labels = tf.image.resize_nearest_neighbor(
                annotation_batch, tf.shape(logits)[1:3], align_corners=True)

        self.get_metric(scaled_labels, logits, 'train')

        softmax_loss = 0
        # outputs_to_scales_to_logits[output]={}
        for output, num_classes in outputs_to_num_classes.items():
            softmax_loss += train_utils.add_softmax_cross_entropy_loss_for_each_scale(
                outputs_to_scales_to_logits[output],
                annotation_batch,
                num_classes,
                self.ignore_label,
                loss_weight=1.0,
                upsample_logits=FLAGS.upsample_logits,
                scope=output)

        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        reg_loss = tf.add_n(regularization_losses)
        tf.summary.scalar('losses/reg_loss', reg_loss)
        model_losses = tf.get_collection(tf.GraphKeys.LOSSES)
        model_loss = tf.add_n(model_losses)
        tf.summary.scalar('losses/model_loss', model_loss)

        learning_rate = train_utils.get_model_learning_rate(
            FLAGS.learning_policy, FLAGS.base_learning_rate,
            FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor,
            FLAGS.training_number_of_steps, FLAGS.learning_power,
            FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)

        optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)
        tf.summary.scalar('learning_rate', learning_rate)

        with tf.control_dependencies(
            [tf.assert_equal(softmax_loss, model_loss)]):
            total_loss = model_loss + reg_loss
            total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
            tf.summary.scalar('losses/total_loss', total_loss)

        global_step = tf.train.get_or_create_global_step()

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        grads_and_vars = optimizer.compute_gradients(total_loss)
        # Create gradient update op.
        grad_updates = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
        update_ops.append(grad_updates)
        update_op = tf.group(*update_ops)

        #        train_tensor=optimizer.minimize(total_loss,global_step)
        #        train_tensor=slim.learning.create_train_op(total_loss=total_loss,
        #            optimizer=optimizer,
        #            global_step=global_step)

        #BUG update the weight twice???
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        summary_op = tf.summary.merge_all()

        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.allow_growth = True

        last_layers = model.get_extra_layer_scopes(
            FLAGS.last_layers_contain_logits_only)
        exclude_list = ['global_step']
        if not FLAGS.initialize_last_layer:
            exclude_list.extend(last_layers)
        variables_to_restore = slim.get_variables_to_restore(
            exclude=exclude_list)
        init_fn = slim.assign_from_checkpoint_fn(
            model_path=FLAGS.tf_initial_checkpoint,
            var_list=variables_to_restore,
            ignore_missing_vars=True)

        #use the train_tensor with slim.learning.train, not session
        #        saver = tf.train.Saver()
        #        train_writer = tf.summary.FileWriter(FLAGS.train_logdir)
        #        sess=tf.Session(config=session_config)
        #        init_fn(sess)
        #        sess.run(tf.global_variables_initializer())
        #        sess.run(tf.local_variables_initializer())
        #        sess.run(tf.tables_initializer())
        #        tf.train.start_queue_runners(sess)
        #
        #        for i in trange(FLAGS.training_number_of_steps):
        #            loss,summary,n_step=sess.run([train_tensor,summary_op,global_step])
        #            train_writer.add_summary(summary,i)
        #            if i%100==1:
        #                print('%d/%d global_step=%0.2f, loss='%(i,FLAGS.training_number_of_steps,n_step),loss)
        #
        #        saver.save(sess,os.path.join(FLAGS.train_logdir,'model'),global_step=FLAGS.training_number_of_steps)
        #        train_writer.close()

        #        Start the training.
        slim.learning.train(train_tensor,
                            logdir=FLAGS.train_logdir,
                            log_every_n_steps=FLAGS.log_steps,
                            master=FLAGS.master,
                            is_chief=(FLAGS.task == 0),
                            number_of_steps=FLAGS.training_number_of_steps,
                            session_config=session_config,
                            startup_delay_steps=0,
                            init_fn=init_fn,
                            summary_op=summary_op,
                            save_summaries_secs=FLAGS.save_summaries_secs,
                            save_interval_secs=FLAGS.save_interval_secs)
Exemplo n.º 17
0
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label):
    """Builds a clone of DeepLab.

  Args:
    inputs_queue: A prefetch queue for images and labels.
    outputs_to_num_classes: A map from output type to the number of classes.
      For example, for the task of semantic segmentation with 21 semantic
      classes, we would have outputs_to_num_classes['semantic'] = 21.
    ignore_label: Ignore label.

  Returns:
    A map of maps from output_type (e.g., semantic prediction) to a
      dictionary of multi-scale logits names to logits. For each output_type,
      the dictionary has keys which correspond to the scales and values which
      correspond to the logits. For example, if `scales` equals [1.0, 1.5],
      then the keys would include 'merged_logits', 'logits_1.00' and
      'logits_1.50'.
  """
    samples = inputs_queue.dequeue()

    # add name to input and label nodes so we can add to summary
    # syaru: tf.identity(samples['image']): transform to tensor(ops)
    samples[common.IMAGE] = tf.identity(
        samples[common.IMAGE],
        name=common.IMAGE)  # syaru: common.IMAGE = 'image'
    samples[common.LABEL] = tf.identity(
        samples[common.LABEL],
        name=common.LABEL)  #        common.LABEL = 'label'

    model_options = common.ModelOptions(
        outputs_to_num_classes=outputs_to_num_classes,
        crop_size=FLAGS.train_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)
    # syaru: model.multi_scale_logits(): Gets the logits for multi-scale inputs.
    # The returned logits are all downsampled (due to max-pooling layers)
    # for both training and evaluation.
    outputs_to_scales_to_logits = model.multi_scale_logits(
        samples[
            common.
            IMAGE],  # syaru: images: A tensor of size [batch, height, width, channels].
        model_options=
        model_options,  # model_options: A ModelOptions instance to configure models.
        image_pyramid=FLAGS.
        image_pyramid,  # image_pyramid: Input image scales for multi-scale feature extraction.
        weight_decay=FLAGS.weight_decay,
        is_training=True,
        fine_tune_batch_norm=FLAGS.fine_tune_batch_norm)

    # add name to graph node so we can add to summary
    outputs_to_scales_to_logits[common.OUTPUT_TYPE][
        model.
        _MERGED_LOGITS_SCOPE] = tf.identity(  # syaru: common.OUTPUT_TYPE = 'semantic'
            outputs_to_scales_to_logits[common.OUTPUT_TYPE]
            [model.
             _MERGED_LOGITS_SCOPE],  # model._MERGED_LOGITS_SCOPE = 'merged_logits'
            name=common.OUTPUT_TYPE)

    for output, num_classes in six.iteritems(
            outputs_to_num_classes
    ):  # syaru: six.iteritems(): 迭代输出字典的键值(outputs_to_num_classes is a dict)
        train_utils.add_softmax_cross_entropy_loss_for_each_scale(  # deeplab.utils.train_utils: Adds softmax cross entropy loss for logits of each scale
            outputs_to_scales_to_logits[
                output],  # scales_to_logits: A map from logits names for different scales to logits. 
            samples[
                common.
                LABEL],  # The logits have shape [batch, logits_height, logits_width, num_classes]. 
            num_classes,  # labels: Groundtruth labels with shape [batch, image_height, image_width, 1].
            ignore_label,
            loss_weight=1.0,
            upsample_logits=FLAGS.upsample_logits,
            scope=output)

    return outputs_to_scales_to_logits
Exemplo n.º 18
0
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label,
                   loss_weight):
    """Builds a clone of DeepLab.

    Args:
      inputs_queue: A prefetch queue for images and labels.
      outputs_to_num_classes: A map from output type to the number of classes.
        For example, for the task of semantic segmentation with 21 semantic
        classes, we would have outputs_to_num_classes['semantic'] = 21.
      ignore_label: Ignore label.
      loss_weight: float or list of floats of length num_classes. Loss weight for each class. Default is 1.0.

    Returns:
      A map of maps from output_type (e.g., semantic prediction) to a
        dictionary of multi-scale logits names to logits. For each output_type,
        the dictionary has keys which correspond to the scales and values which
        correspond to the logits. For example, if `scales` equals [1.0, 1.5],
        then the keys would include 'merged_logits', 'logits_1.00' and
        'logits_1.50'.
    """
    samples = inputs_queue.dequeue()

    # Add name to input and label nodes so we can add to summary.
    samples[common.IMAGE] = tf.identity(samples[common.IMAGE],
                                        name=common.IMAGE)
    samples[common.LABEL] = tf.identity(samples[common.LABEL],
                                        name=common.LABEL)

    if FLAGS.input_hints:

        ###
        if 'dynamic_block_hint' in FLAGS.hint_types:
            assert len(
                FLAGS.hint_types
            ) == 1, 'When using dynamic block hints, do not use other hint types!'
            print("----")
            print("train.py: Block hints with grid {}x{}.".format(
                FLAGS.dynamic_block_hint_B, FLAGS.dynamic_block_hint_B))
            print("train.py: Drawing blocks with p {}.".format(
                FLAGS.dynamic_block_hint_p))

            class_hints, hinted = tf.py_func(
                func=train_utils.generate_class_partial_boundaries_helper(
                    B=FLAGS.dynamic_block_hint_B,
                    p=FLAGS.dynamic_block_hint_p),
                inp=[samples[common.LABEL]],
                Tout=[tf.uint8, tf.bool])
            samples[common.HINT] = class_hints
            samples[common.HINT].set_shape(
                samples[common.LABEL].get_shape().as_list())
            FLAGS.hint_types = ['class_hint']

        if 'class_hint' in FLAGS.hint_types:
            assert len(
                FLAGS.hint_types
            ) == 1, 'When using class hints, do not use other hint types!'
            num_classes = outputs_to_num_classes['semantic']
            print('train.py: num semantic classes is {}'.format(num_classes))

            class_hint_channels_list = []
            for label in range(num_classes):
                # Multiply by 255 is to bring into same range as image pixels...,
                # and so feature_extractor mean subtraction will reduce it back to 0,1 range
                class_hint_channel = tf.to_float(
                    tf.equal(samples[common.HINT], label)) * 255
                class_hint_channels_list.append(class_hint_channel)
            class_hint_channels = tf.concat(class_hint_channels_list, axis=-1)
            samples[common.HINT] = class_hint_channels
        ####

        # Get hints and concat to image as input into network
        samples[common.HINT] = tf.identity(samples[common.HINT],
                                           name=common.HINT)
        model_inputs = tf.concat(
            [samples[common.IMAGE],
             tf.to_float(samples[common.HINT])],
            axis=-1)
    else:
        # Just image is input into network
        model_inputs = samples[common.IMAGE]

    model_options = common.ModelOptions(
        outputs_to_num_classes=outputs_to_num_classes,
        crop_size=FLAGS.train_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    print('train.py: FORCE_DROPOUT IS {}'.format(FLAGS.force_dropout))
    if FLAGS.force_dropout:
        print('train.py: FORCE_DROPOUT keep prob {}'.format(FLAGS.keep_prob))
        print('train.py: FORCE_DROPOUT_ONLY_BRANCH IS {}'.format(
            FLAGS.force_dropout_only_branch))

    outputs_to_scales_to_logits = model.multi_scale_logits(
        model_inputs,
        model_options=model_options,
        image_pyramid=FLAGS.image_pyramid,
        weight_decay=FLAGS.weight_decay,
        is_training=True,
        fine_tune_batch_norm=FLAGS.fine_tune_batch_norm,
        force_dropout=FLAGS.force_dropout,
        force_dropout_only_branch=FLAGS.force_dropout_only_branch,
        keep_prob=FLAGS.keep_prob)

    # Add name to graph node so we can add to summary.
    output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
    output_type_dict[model.get_merged_logits_scope()] = tf.identity(
        output_type_dict[model.get_merged_logits_scope()],
        name=common.OUTPUT_TYPE)

    for output, num_classes in six.iteritems(outputs_to_num_classes):

        print('OUTPUTS: {}'.format(output))
        train_utils.add_softmax_cross_entropy_loss_for_each_scale(
            outputs_to_scales_to_logits[output],
            samples[common.LABEL],
            num_classes,
            ignore_label,
            loss_weight=loss_weight,
            upsample_logits=FLAGS.upsample_logits,
            scope=output,
        )

    return outputs_to_scales_to_logits
Exemplo n.º 19
0
def _build_deeplab(iterator, outputs_to_num_classes, ignore_label):
    """Builds a clone of DeepLab.

  Args:
    iterator: An iterator of type tf.data.Iterator for images and labels.
    outputs_to_num_classes: A map from output type to the number of classes. For
      example, for the task of semantic segmentation with 21 semantic classes,
      we would have outputs_to_num_classes['semantic'] = 21.
    ignore_label: Ignore label.
  """
    samples = iterator.get_next()

    train_size = [int(sz) for sz in FLAGS.train_crop_size]

    if FLAGS.nus_preprocess is not None:
        train_size = [FLAGS.nus_sampling_size] * 2

    if FLAGS.nus_type is not None:
        train_size = [FLAGS.nus_sampling_size] * 2
        # sampling requested
        if FLAGS.nus_type == 'uniform':
            sampling_location = _nus_uniform_locations()
        else:
            shape = list(samples[common.IMAGE].get_shape())
            if not isinstance(shape[0], int):
                shape[0] = FLAGS.train_batch_size // FLAGS.num_clones
                samples[common.IMAGE].set_shape(shape)
            sampling_location = _nus_locations(samples[common.IMAGE])
            if FLAGS.nus_train:
                target_locations = samples[TARGET_SAMPLING]

                tf.losses.mean_squared_error(sampling_location,
                                             target_locations)
                target_locations.set_shape(sampling_location.get_shape())

                tf.summary.image("InputImages", samples[common.IMAGE])
                tf.summary.image("InputLabel",
                                 tf.to_float(samples[common.LABEL]) / 19)
                tf.summary.image("ResViz", viz(sampling_location))
                tf.summary.image("TargetViz", viz(target_locations))

                return
            sampling_location = _resize_locations(sampling_location)

        with tf.name_scope("NUS-Sampling", values=[samples,
                                                   sampling_location]):
            samples = _nus_sample(samples, sampling_location)

    # Add name to input and label nodes so we can add to summary.
    samples[common.IMAGE] = tf.identity(samples[common.IMAGE],
                                        name=common.IMAGE)
    samples[common.LABEL] = tf.identity(samples[common.LABEL],
                                        name=common.LABEL)

    if FLAGS.nus_preprocess:
        sampling = samples[SAMPLING]
        sampling_viz = tf.py_func(
            viz_sampling,
            [sampling],
            tf.uint8,
        )
        tf.summary.image("Sampling", sampling_viz)

    model_options = common.ModelOptions(
        outputs_to_num_classes=outputs_to_num_classes,
        crop_size=train_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    outputs_to_scales_to_logits = model.multi_scale_logits(
        samples[common.IMAGE],
        model_options=model_options,
        image_pyramid=FLAGS.image_pyramid,
        weight_decay=FLAGS.weight_decay,
        is_training=True,
        fine_tune_batch_norm=FLAGS.fine_tune_batch_norm,
        nas_training_hyper_parameters={
            'drop_path_keep_prob': FLAGS.drop_path_keep_prob,
            'total_training_steps': FLAGS.training_number_of_steps,
        })

    # Add name to graph node so we can add to summary.
    output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE]
    output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity(
        output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE)

    for output, num_classes in six.iteritems(outputs_to_num_classes):
        train_utils.add_softmax_cross_entropy_loss_for_each_scale(
            outputs_to_scales_to_logits[output],
            samples[common.LABEL],
            num_classes,
            ignore_label,
            loss_weight=1.0,
            upsample_logits=FLAGS.upsample_logits,
            hard_example_mining_step=FLAGS.hard_example_mining_step,
            top_k_percent_pixels=FLAGS.top_k_percent_pixels,
            scope=output)

        # Log the summary
        _log_summaries(samples[common.IMAGE], samples[common.LABEL],
                       num_classes,
                       output_type_dict[model.MERGED_LOGITS_SCOPE])