def testForwardpassDeepLabv3plus(self): crop_size = [33, 33] outputs_to_num_classes = {'semantic': 3} model_options = common.ModelOptions( outputs_to_num_classes, crop_size, output_stride=16 )._replace( add_image_level_feature=True, aspp_with_batch_norm=True, logits_kernel_size=1, model_variant='mobilenet_v2') # Employ MobileNetv2 for fast test. g = tf.Graph() with g.as_default(): with self.test_session(graph=g) as sess: inputs = tf.random_uniform( (1, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_logits = model.multi_scale_logits( inputs, model_options, image_pyramid=[1.0]) sess.run(tf.global_variables_initializer()) outputs_to_scales_to_logits = sess.run(outputs_to_scales_to_logits) # Check computed results for each output type. for output in outputs_to_num_classes: scales_to_logits = outputs_to_scales_to_logits[output] # Expect only one output. self.assertEquals(len(scales_to_logits), 1) for logits in scales_to_logits.values(): self.assertTrue(logits.any())
def testBuildDeepLabWithDensePredictionCell(self): batch_size = 1 crop_size = [33, 33] outputs_to_num_classes = {'semantic': 2} expected_endpoints = ['merged_logits'] dense_prediction_cell_config = [ {'kernel': 3, 'rate': [1, 6], 'op': 'conv', 'input': -1}, {'kernel': 3, 'rate': [18, 15], 'op': 'conv', 'input': 0}, ] model_options = common.ModelOptions( outputs_to_num_classes, crop_size, output_stride=16)._replace( aspp_with_batch_norm=True, model_variant='mobilenet_v2', dense_prediction_cell_config=dense_prediction_cell_config) g = tf.Graph() with g.as_default(): with self.test_session(graph=g): inputs = tf.random_uniform( (batch_size, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_model_results = model.multi_scale_logits( inputs, model_options, image_pyramid=[1.0]) for output in outputs_to_num_classes: scales_to_model_results = outputs_to_scales_to_model_results[output] self.assertListEqual( list(scales_to_model_results), expected_endpoints) self.assertEqual(len(scales_to_model_results), 1)
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: inputs_queue: A prefetch queue for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. Returns: A map of maps from output_type (e.g., semantic prediction) to a dictionary of multi-scale logits names to logits. For each output_type, the dictionary has keys which correspond to the scales and values which correspond to the logits. For example, if `scales` equals [1.0, 1.5], then the keys would include 'merged_logits', 'logits_1.00' and 'logits_1.50'. """ samples = inputs_queue.dequeue() # Add name to input and label nodes so we can add to summary. samples[common.IMAGE] = tf.identity( samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity( samples[common.LABEL], name=common.LABEL) model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.get_merged_logits_scope()] = tf.identity( output_type_dict[model.get_merged_logits_scope()], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, scope=output) return outputs_to_scales_to_logits
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: inputs_queue: A prefetch queue for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. Returns: A map of maps from output_type (e.g., semantic prediction) to a dictionary of multi-scale logits names to logits. For each output_type, the dictionary has keys which correspond to the scales and values which correspond to the logits. For example, if `scales` equals [1.0, 1.5], then the keys would include 'merged_logits', 'logits_1.00' and 'logits_1.50'. """ samples = inputs_queue.dequeue() # Add name to input and label nodes so we can add to summary. samples[common.IMAGE] = tf.identity( samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity( samples[common.LABEL], name=common.LABEL) model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity( output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, scope=output) return outputs_to_scales_to_logits
def _build_deeplab(iterator, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: iterator: An iterator of type tf.data.Iterator for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. """ samples = iterator.get_next() # Add name to input and label nodes so we can add to summary. samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL) model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=[int(sz) for sz in FLAGS.train_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm, nas_training_hyper_parameters={ 'drop_path_keep_prob': FLAGS.drop_path_keep_prob, 'total_training_steps': FLAGS.training_number_of_steps, }) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity( output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=model_options.label_weights, upsample_logits=FLAGS.upsample_logits, hard_example_mining_step=FLAGS.hard_example_mining_step, top_k_percent_pixels=FLAGS.top_k_percent_pixels, scope=output, # my code is here use_hybrid_loss=FLAGS.use_hybrid_loss, batch_size=FLAGS.train_batch_size // FLAGS.num_clones)
def _build_deeplab(iterator, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: iterator: An iterator of type tf.data.Iterator for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. """ samples = iterator.get_next() # Add name to input and label nodes so we can add to summary. samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL) model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm, nas_training_hyper_parameters={ 'drop_path_keep_prob': FLAGS.drop_path_keep_prob, 'total_training_steps': FLAGS.training_number_of_steps, }) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity( output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, hard_example_mining_step=FLAGS.hard_example_mining_step, top_k_percent_pixels=FLAGS.top_k_percent_pixels, scope=output) # Log the summary _log_summaries(samples[common.IMAGE], samples[common.LABEL], num_classes, output_type_dict[model.MERGED_LOGITS_SCOPE])
def _val_loss(dataset, image, label, num_of_classes, ignore_label): outputs_to_num_classes = {common.OUTPUT_TYPE: dataset.num_of_classes} val_summaries = [] with tf.variable_scope(tf.get_variable_scope(), reuse=True): model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( image, model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm, nas_training_hyper_parameters={ 'drop_path_keep_prob': FLAGS.drop_path_keep_prob, 'total_training_steps': FLAGS.training_number_of_steps, }) with tf.name_scope('val_loss') as scope: for output, num_classes in six.iteritems(outputs_to_num_classes): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], label, num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, hard_example_mining_step=FLAGS.hard_example_mining_step, top_k_percent_pixels=FLAGS.top_k_percent_pixels, scope=output) losses = tf.losses.get_losses(scope=scope) for loss in losses: tf.summary.scalar('Val_losses/%s' % loss.op.name, loss) regularization_loss = tf.losses.get_regularization_loss( scope=scope) tf.summary.scalar('Val_losses/%s' % regularization_loss.op.name, regularization_loss) total_loss = tf.add_n([tf.add_n(losses), regularization_loss]) val_summaries.append( tf.summary.scalar('total_validation_loss', total_loss)) val_summary_op = tf.summary.merge(val_summaries) return total_loss, val_summary_op
def _build_deeplab(iterator, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: iterator: An iterator of type tf.data.Iterator for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. """ samples = iterator.get_next() # Add name to input and label nodes so we can add to summary. samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL) model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=[int(sz) for sz in FLAGS.train_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) # Set to 8 to ensure tensor sizes match for concat op in model.py outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, # not used. only for multi-scale. we use single-scale weight_decay=FLAGS.weight_decay, # use default in nas_network is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm, # define to False b/c we're using batch size 8 nas_training_hyper_parameters={ 'drop_path_keep_prob': FLAGS.drop_path_keep_prob, # set to 1.0 earlier, but not sure what it should be 'total_training_steps': FLAGS.training_number_of_steps, }) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity( output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=model_options.label_weights, upsample_logits=FLAGS.upsample_logits, # set to True earlier hard_example_mining_step=FLAGS.hard_example_mining_step, # set to 0 earlier top_k_percent_pixels=FLAGS.top_k_percent_pixels, # set to 1 earlier scope=output)
def _construct_and_fill_model(self): # TODO: Factor out progress in base class progress_dummy = sly.Progress('Building model:', 1) progress_dummy.iter_done_report() self.device_ids = sly.env.remap_gpu_devices(self.config['gpu_devices']) src_size = self.config['input_size'] self.input_size = (src_size['height'], src_size['width']) model_options = ModelOptions( outputs_to_num_classes={'semantic': self.model_out_dims}, crop_size=self.input_size, atrous_rates=self.config['atrous_rates'], output_stride=self.config['output_stride']) self.inputs = tf.placeholder(tf.float32, [None] + list(self.input_size) + [3]) self.labels = tf.placeholder(tf.int32, [None] + list(self.input_size) + [1]) self.outputs_to_scales_to_logits = model.multi_scale_logits( images=self.inputs, model_options=model_options, image_pyramid=None, weight_decay=self.config['weight_decay'], is_training=True, fine_tune_batch_norm=False) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.outputs_to_scales_to_logits_val = model.multi_scale_logits( images=self.inputs, model_options=model_options, image_pyramid=None, weight_decay=self.config['weight_decay'], is_training=False, fine_tune_batch_norm=False)
def testBuildDeepLabv2(self): batch_size = 2 crop_size = [41, 41] # Test with two image_pyramids. image_pyramids = [[1], [0.5, 1]] # Test two model variants. model_variants = ['xception_65', 'mobilenet_v2'] # Test with two output_types. outputs_to_num_classes = {'semantic': 3, 'direction': 2} expected_endpoints = [['merged_logits'], ['merged_logits', 'logits_0.50', 'logits_1.00']] expected_num_logits = [1, 3] for model_variant in model_variants: model_options = common.ModelOptions( outputs_to_num_classes)._replace( add_image_level_feature=False, aspp_with_batch_norm=False, aspp_with_separable_conv=False, model_variant=model_variant) for i, image_pyramid in enumerate(image_pyramids): g = tf.Graph() with g.as_default(): with self.test_session(graph=g): inputs = tf.random_uniform( (batch_size, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_logits = model.multi_scale_logits( inputs, model_options, image_pyramid=image_pyramid) # Check computed results for each output type. for output in outputs_to_num_classes: scales_to_logits = outputs_to_scales_to_logits[ output] self.assertListEqual( sorted(scales_to_logits.keys()), sorted(expected_endpoints[i])) # Expected number of logits = len(image_pyramid) + 1, since the # last logits is merged from all the scales. self.assertEqual(len(scales_to_logits), expected_num_logits[i])
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: inputs_queue: A prefetch queue for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. Returns: A map of maps from output_type (e.g., semantic prediction) to a dictionary of multi-scale logits names to logits. For each output_type, the dictionary has keys which correspond to the scales and values which correspond to the logits. For example, if `scales` equals [1.0, 1.5], then the keys would include 'merged_logits', 'logits_1.00' and 'logits_1.50'. """ samples = inputs_queue.dequeue() # add name input and label so we can add to summary samples[common.IMAGE] = tf.identity(samples[common.IMAGE], 'input_image') samples[common.LABEL] = tf.identity(samples[common.LABEL], 'input_label') # add name to graph node so we can add to summary outputs_to_scales_to_logits[common.OUTPUT_TYPE][ model._MERGED_LOGITS_SCOPE] = tf.identity( outputs_to_scales_to_logits[common.OUTPUT_TYPE][ model._MERGED_LOGITS_SCOPE], name='semantic_merged_logits') model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) return outputs_to_scales_to_logits
def testBuildDeepLabv2(self): batch_size = 2 crop_size = [41, 41] # Test with two image_pyramids. image_pyramids = [[1], [0.5, 1]] # Test two model variants. model_variants = ['xception_65', 'mobilenet_v2'] # Test with two output_types. outputs_to_num_classes = {'semantic': 3, 'direction': 2} expected_endpoints = [['merged_logits'], ['merged_logits', 'logits_0.50', 'logits_1.00']] expected_num_logits = [1, 3] for model_variant in model_variants: model_options = common.ModelOptions(outputs_to_num_classes)._replace( add_image_level_feature=False, aspp_with_batch_norm=False, aspp_with_separable_conv=False, model_variant=model_variant) for i, image_pyramid in enumerate(image_pyramids): g = tf.Graph() with g.as_default(): with self.test_session(graph=g): inputs = tf.random_uniform( (batch_size, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_logits = model.multi_scale_logits( inputs, model_options, image_pyramid=image_pyramid) # Check computed results for each output type. for output in outputs_to_num_classes: scales_to_logits = outputs_to_scales_to_logits[output] self.assertListEqual(sorted(scales_to_logits.keys()), sorted(expected_endpoints[i])) # Expected number of logits = len(image_pyramid) + 1, since the # last logits is merged from all the scales. self.assertEqual(len(scales_to_logits), expected_num_logits[i])
def _build_deeplab_inputs(model_inputs, outputs_to_num_classes): """Builds a clone of DeepLab. MODIFIED FROM train.py-->_build_deeplab. The purpose of this function is just to build the model. """ model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( model_inputs, model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=0.01, #weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=True) #FLAGS.fine_tune_batch_norm) return outputs_to_scales_to_logits
def testBuildDeepLabWithDensePredictionCell(self): batch_size = 1 crop_size = [33, 33] outputs_to_num_classes = {'semantic': 2} expected_endpoints = ['merged_logits'] dense_prediction_cell_config = [ { 'kernel': 3, 'rate': [1, 6], 'op': 'conv', 'input': -1 }, { 'kernel': 3, 'rate': [18, 15], 'op': 'conv', 'input': 0 }, ] model_options = common.ModelOptions( outputs_to_num_classes, crop_size, output_stride=16)._replace( aspp_with_batch_norm=True, model_variant='mobilenet_v2', dense_prediction_cell_config=dense_prediction_cell_config) g = tf.Graph() with g.as_default(): with self.test_session(graph=g): inputs = tf.random_uniform( (batch_size, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_model_results = model.multi_scale_logits( inputs, model_options, image_pyramid=[1.0]) for output in outputs_to_num_classes: scales_to_model_results = outputs_to_scales_to_model_results[ output] #self.assertListEqual(scales_to_model_results.keys(), self.assertListEqual(list(scales_to_model_results.keys()), expected_endpoints) self.assertEqual(len(scales_to_model_results), 1)
def testForwardpassDeepLabv3plus(self): crop_size = [33, 33] outputs_to_num_classes = {'semantic': 3} model_options = common.ModelOptions( outputs_to_num_classes, crop_size, atrous_rates=[6], output_stride=16 )._replace( add_image_level_feature=True, aspp_with_batch_norm=True, aspp_with_separable_conv=True, decoder_output_stride=4, decoder_use_separable_conv=True, logits_kernel_size=1, model_variant='xception_65') g = tf.Graph() with g.as_default(): with self.test_session(graph=g) as sess: inputs = tf.random_uniform( (1, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_logits = model.multi_scale_logits( inputs, model_options, image_pyramid=[1.0]) sess.run(tf.global_variables_initializer()) outputs_to_scales_to_logits = sess.run(outputs_to_scales_to_logits) # Check computed results for each output type. for output in outputs_to_num_classes: scales_to_logits = outputs_to_scales_to_logits[output] # Expect only one output. self.assertEquals(len(scales_to_logits), 1) for logits in scales_to_logits.values(): self.assertTrue(logits.any())
def train(self): FLAGS = self.flags image_batch, annotation_batch = get_dataset( FLAGS, mode=tf.estimator.ModeKeys.TRAIN) outputs_to_num_classes = {common.OUTPUT_TYPE: self.num_classes} model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) # outputs_to_scales_to_logits[key_1][key_2]=logits # key_1 in outputs_to_num_classes.keys() # key_2 in ['logits_%.2f' % image_scale for image_scale in image_pyramid]+[MERGED_LOGITS_SCOPE] outputs_to_scales_to_logits = model.multi_scale_logits( image_batch, model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] logits = output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity( output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE) labels = annotation_batch if FLAGS.upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear(logits, tf.shape(labels)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( annotation_batch, tf.shape(logits)[1:3], align_corners=True) self.get_metric(scaled_labels, logits, 'train') softmax_loss = 0 # outputs_to_scales_to_logits[output]={} for output, num_classes in outputs_to_num_classes.items(): softmax_loss += train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], annotation_batch, num_classes, self.ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, scope=output) regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) reg_loss = tf.add_n(regularization_losses) tf.summary.scalar('losses/reg_loss', reg_loss) model_losses = tf.get_collection(tf.GraphKeys.LOSSES) model_loss = tf.add_n(model_losses) tf.summary.scalar('losses/model_loss', model_loss) learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) tf.summary.scalar('learning_rate', learning_rate) with tf.control_dependencies( [tf.assert_equal(softmax_loss, model_loss)]): total_loss = model_loss + reg_loss total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') tf.summary.scalar('losses/total_loss', total_loss) global_step = tf.train.get_or_create_global_step() update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) grads_and_vars = optimizer.compute_gradients(total_loss) # Create gradient update op. grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) # train_tensor=optimizer.minimize(total_loss,global_step) # train_tensor=slim.learning.create_train_op(total_loss=total_loss, # optimizer=optimizer, # global_step=global_step) #BUG update the weight twice??? with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') summary_op = tf.summary.merge_all() session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = True last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) exclude_list = ['global_step'] if not FLAGS.initialize_last_layer: exclude_list.extend(last_layers) variables_to_restore = slim.get_variables_to_restore( exclude=exclude_list) init_fn = slim.assign_from_checkpoint_fn( model_path=FLAGS.tf_initial_checkpoint, var_list=variables_to_restore, ignore_missing_vars=True) #use the train_tensor with slim.learning.train, not session # saver = tf.train.Saver() # train_writer = tf.summary.FileWriter(FLAGS.train_logdir) # sess=tf.Session(config=session_config) # init_fn(sess) # sess.run(tf.global_variables_initializer()) # sess.run(tf.local_variables_initializer()) # sess.run(tf.tables_initializer()) # tf.train.start_queue_runners(sess) # # for i in trange(FLAGS.training_number_of_steps): # loss,summary,n_step=sess.run([train_tensor,summary_op,global_step]) # train_writer.add_summary(summary,i) # if i%100==1: # print('%d/%d global_step=%0.2f, loss='%(i,FLAGS.training_number_of_steps,n_step),loss) # # saver.save(sess,os.path.join(FLAGS.train_logdir,'model'),global_step=FLAGS.training_number_of_steps) # train_writer.close() # Start the training. slim.learning.train(train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, is_chief=(FLAGS.task == 0), number_of_steps=FLAGS.training_number_of_steps, session_config=session_config, startup_delay_steps=0, init_fn=init_fn, summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: inputs_queue: A prefetch queue for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. Returns: A map of maps from output_type (e.g., semantic prediction) to a dictionary of multi-scale logits names to logits. For each output_type, the dictionary has keys which correspond to the scales and values which correspond to the logits. For example, if `scales` equals [1.0, 1.5], then the keys would include 'merged_logits', 'logits_1.00' and 'logits_1.50'. """ samples = inputs_queue.dequeue() # add name to input and label nodes so we can add to summary # syaru: tf.identity(samples['image']): transform to tensor(ops) samples[common.IMAGE] = tf.identity( samples[common.IMAGE], name=common.IMAGE) # syaru: common.IMAGE = 'image' samples[common.LABEL] = tf.identity( samples[common.LABEL], name=common.LABEL) # common.LABEL = 'label' model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) # syaru: model.multi_scale_logits(): Gets the logits for multi-scale inputs. # The returned logits are all downsampled (due to max-pooling layers) # for both training and evaluation. outputs_to_scales_to_logits = model.multi_scale_logits( samples[ common. IMAGE], # syaru: images: A tensor of size [batch, height, width, channels]. model_options= model_options, # model_options: A ModelOptions instance to configure models. image_pyramid=FLAGS. image_pyramid, # image_pyramid: Input image scales for multi-scale feature extraction. weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) # add name to graph node so we can add to summary outputs_to_scales_to_logits[common.OUTPUT_TYPE][ model. _MERGED_LOGITS_SCOPE] = tf.identity( # syaru: common.OUTPUT_TYPE = 'semantic' outputs_to_scales_to_logits[common.OUTPUT_TYPE] [model. _MERGED_LOGITS_SCOPE], # model._MERGED_LOGITS_SCOPE = 'merged_logits' name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems( outputs_to_num_classes ): # syaru: six.iteritems(): 迭代输出字典的键值(outputs_to_num_classes is a dict) train_utils.add_softmax_cross_entropy_loss_for_each_scale( # deeplab.utils.train_utils: Adds softmax cross entropy loss for logits of each scale outputs_to_scales_to_logits[ output], # scales_to_logits: A map from logits names for different scales to logits. samples[ common. LABEL], # The logits have shape [batch, logits_height, logits_width, num_classes]. num_classes, # labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, scope=output) return outputs_to_scales_to_logits
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label, loss_weight): """Builds a clone of DeepLab. Args: inputs_queue: A prefetch queue for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. loss_weight: float or list of floats of length num_classes. Loss weight for each class. Default is 1.0. Returns: A map of maps from output_type (e.g., semantic prediction) to a dictionary of multi-scale logits names to logits. For each output_type, the dictionary has keys which correspond to the scales and values which correspond to the logits. For example, if `scales` equals [1.0, 1.5], then the keys would include 'merged_logits', 'logits_1.00' and 'logits_1.50'. """ samples = inputs_queue.dequeue() # Add name to input and label nodes so we can add to summary. samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL) if FLAGS.input_hints: ### if 'dynamic_block_hint' in FLAGS.hint_types: assert len( FLAGS.hint_types ) == 1, 'When using dynamic block hints, do not use other hint types!' print("----") print("train.py: Block hints with grid {}x{}.".format( FLAGS.dynamic_block_hint_B, FLAGS.dynamic_block_hint_B)) print("train.py: Drawing blocks with p {}.".format( FLAGS.dynamic_block_hint_p)) class_hints, hinted = tf.py_func( func=train_utils.generate_class_partial_boundaries_helper( B=FLAGS.dynamic_block_hint_B, p=FLAGS.dynamic_block_hint_p), inp=[samples[common.LABEL]], Tout=[tf.uint8, tf.bool]) samples[common.HINT] = class_hints samples[common.HINT].set_shape( samples[common.LABEL].get_shape().as_list()) FLAGS.hint_types = ['class_hint'] if 'class_hint' in FLAGS.hint_types: assert len( FLAGS.hint_types ) == 1, 'When using class hints, do not use other hint types!' num_classes = outputs_to_num_classes['semantic'] print('train.py: num semantic classes is {}'.format(num_classes)) class_hint_channels_list = [] for label in range(num_classes): # Multiply by 255 is to bring into same range as image pixels..., # and so feature_extractor mean subtraction will reduce it back to 0,1 range class_hint_channel = tf.to_float( tf.equal(samples[common.HINT], label)) * 255 class_hint_channels_list.append(class_hint_channel) class_hint_channels = tf.concat(class_hint_channels_list, axis=-1) samples[common.HINT] = class_hint_channels #### # Get hints and concat to image as input into network samples[common.HINT] = tf.identity(samples[common.HINT], name=common.HINT) model_inputs = tf.concat( [samples[common.IMAGE], tf.to_float(samples[common.HINT])], axis=-1) else: # Just image is input into network model_inputs = samples[common.IMAGE] model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) print('train.py: FORCE_DROPOUT IS {}'.format(FLAGS.force_dropout)) if FLAGS.force_dropout: print('train.py: FORCE_DROPOUT keep prob {}'.format(FLAGS.keep_prob)) print('train.py: FORCE_DROPOUT_ONLY_BRANCH IS {}'.format( FLAGS.force_dropout_only_branch)) outputs_to_scales_to_logits = model.multi_scale_logits( model_inputs, model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm, force_dropout=FLAGS.force_dropout, force_dropout_only_branch=FLAGS.force_dropout_only_branch, keep_prob=FLAGS.keep_prob) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.get_merged_logits_scope()] = tf.identity( output_type_dict[model.get_merged_logits_scope()], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): print('OUTPUTS: {}'.format(output)) train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=loss_weight, upsample_logits=FLAGS.upsample_logits, scope=output, ) return outputs_to_scales_to_logits
def _build_deeplab(iterator, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: iterator: An iterator of type tf.data.Iterator for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. """ samples = iterator.get_next() train_size = [int(sz) for sz in FLAGS.train_crop_size] if FLAGS.nus_preprocess is not None: train_size = [FLAGS.nus_sampling_size] * 2 if FLAGS.nus_type is not None: train_size = [FLAGS.nus_sampling_size] * 2 # sampling requested if FLAGS.nus_type == 'uniform': sampling_location = _nus_uniform_locations() else: shape = list(samples[common.IMAGE].get_shape()) if not isinstance(shape[0], int): shape[0] = FLAGS.train_batch_size // FLAGS.num_clones samples[common.IMAGE].set_shape(shape) sampling_location = _nus_locations(samples[common.IMAGE]) if FLAGS.nus_train: target_locations = samples[TARGET_SAMPLING] tf.losses.mean_squared_error(sampling_location, target_locations) target_locations.set_shape(sampling_location.get_shape()) tf.summary.image("InputImages", samples[common.IMAGE]) tf.summary.image("InputLabel", tf.to_float(samples[common.LABEL]) / 19) tf.summary.image("ResViz", viz(sampling_location)) tf.summary.image("TargetViz", viz(target_locations)) return sampling_location = _resize_locations(sampling_location) with tf.name_scope("NUS-Sampling", values=[samples, sampling_location]): samples = _nus_sample(samples, sampling_location) # Add name to input and label nodes so we can add to summary. samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL) if FLAGS.nus_preprocess: sampling = samples[SAMPLING] sampling_viz = tf.py_func( viz_sampling, [sampling], tf.uint8, ) tf.summary.image("Sampling", sampling_viz) model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=train_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm, nas_training_hyper_parameters={ 'drop_path_keep_prob': FLAGS.drop_path_keep_prob, 'total_training_steps': FLAGS.training_number_of_steps, }) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity( output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, hard_example_mining_step=FLAGS.hard_example_mining_step, top_k_percent_pixels=FLAGS.top_k_percent_pixels, scope=output) # Log the summary _log_summaries(samples[common.IMAGE], samples[common.LABEL], num_classes, output_type_dict[model.MERGED_LOGITS_SCOPE])