def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.eval_batch_size, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, num_readers=2, is_training=False, should_shuffle=False, should_repeat=False) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() model_options = common.ModelOptions( outputs_to_num_classes={ common.OUTPUT_TYPE: dataset.num_of_classes, common.INSTANCE: 1, common.OFFSET: 2 }, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly. samples[common.IMAGE].set_shape([ FLAGS.eval_batch_size, int(FLAGS.eval_crop_size[0]), int(FLAGS.eval_crop_size[1]), 3 ]) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions_semantic = predictions[common.OUTPUT_TYPE] predictions_center_points = predictions[common.INSTANCE] predictions_offset_vectors = predictions[common.OFFSET] # tf Non-maxima Suppression # Pooling based NMS for Pooling Instance Centers # Filtering predictions that are less than 0.1 instance_prediction = generate_instance_segmentation( predictions_semantic, predictions_center_points, predictions_offset_vectors) category_prediction = tf.squeeze(predictions_semantic) category_label = tf.squeeze(samples[common.LABEL][0]) not_ignore_mask = tf.not_equal(category_label, 255) category_label = tf.cast( category_label * tf.cast(not_ignore_mask, tf.int32), tf.int32) instance_label = tf.squeeze(samples[common.LABEL_INSTANCE_IDS][0]) category_prediction = category_prediction * tf.cast( not_ignore_mask, tf.int64) instance_prediction = instance_prediction * tf.cast( not_ignore_mask, tf.int64) # Define the evaluation metric. metric_map = {} metric_map[ 'panoptic_quality'] = streaming_metrics.streaming_panoptic_quality( category_label, instance_label, category_prediction, instance_prediction, num_classes=19, max_instances_per_category=256, ignored_label=255, offset=256 * 256) metric_map[ 'parsing_covering'] = streaming_metrics.streaming_parsing_covering( category_label, instance_label, category_prediction, instance_prediction, num_classes=19, max_instances_per_category=256, ignored_label=255, offset=256 * 256, normalize_by_image_size=True) metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map( metric_map) summary_ops = [] for metric_name, metric_value in metrics_to_values.iteritems(): if metric_name == 'panoptic_quality': [pq, sq, rq, total_tp, total_fn, total_fp] = tf.unstack(metric_value, 6, axis=0) panoptic_metrics = { # Panoptic quality. 'pq': pq, # Segmentation quality. 'sq': sq, # Recognition quality. 'rq': rq, # Total true positives. 'total_tp': total_tp, # Total false negatives. 'total_fn': total_fn, # Total false positives. 'total_fp': total_fp, } # Find the valid classes that will be used for evaluation. We will # ignore the `ignore_label` class and other classes which have (tp + fn # + fp) equal to 0. valid_classes = tf.logical_and( tf.not_equal(tf.range(0, dataset.num_of_classes), dataset.ignore_label), tf.not_equal(total_tp + total_fn + total_fp, 0)) for target_metric, target_value in panoptic_metrics.iteritems( ): output_metric_name = '{}_{}'.format( metric_name, target_metric) op = tf.summary.scalar( output_metric_name, tf.reduce_mean( tf.boolean_mask(target_value, valid_classes))) op = tf.Print(op, [target_value], output_metric_name + '_classwise: ', summarize=dataset.num_of_classes) op = tf.Print(op, [ tf.reduce_mean( tf.boolean_mask(target_value, valid_classes)) ], output_metric_name + '_mean: ', summarize=1) summary_ops.append(op) elif metric_name == 'parsing_covering': [ per_class_covering, total_per_class_weighted_ious, total_per_class_gt_areas ] = tf.unstack(metric_value, 3, axis=0) # Find the valid classes that will be used for evaluation. We will # ignore the `void_label` class and other classes which have # total_per_class_weighted_ious + total_per_class_gt_areas equal to 0. valid_classes = tf.logical_and( tf.not_equal(tf.range(0, dataset.num_of_classes), dataset.ignore_label), tf.not_equal( total_per_class_weighted_ious + total_per_class_gt_areas, 0)) op = tf.summary.scalar( metric_name, tf.reduce_mean( tf.boolean_mask(per_class_covering, valid_classes))) op = tf.Print(op, [per_class_covering], metric_name + '_classwise: ', summarize=dataset.num_of_classes) op = tf.Print(op, [ tf.reduce_mean( tf.boolean_mask(per_class_covering, valid_classes)) ], metric_name + '_mean: ', summarize=1) summary_ops.append(op) else: raise ValueError('The metric_name "%s" is not supported.' % metric_name) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations if FLAGS.quantize_delay_step >= 0: contrib_quantize.create_eval_graph() contrib_tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=contrib_tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) contrib_tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS) metric_values = slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=20, eval_op=metrics_to_updates.values(), final_op=metrics_to_values.values(), summary_op=tf.summary.merge(summary_ops), max_number_of_evaluations=FLAGS.max_number_of_evaluations, eval_interval_secs=FLAGS.eval_interval_secs)
def test_streaming_metric_on_multiple_images(self): """Tests streaming parsing covering metric.""" num_classes = 7 offset = 256 * 256 bird_gt_instance_class_map = { 92: 5, 176: 3, 255: 4, } cat_gt_instance_class_map = { 0: 0, 255: 6, } team_gt_instance_class_map = { 0: 0, 47: 1, 97: 1, 133: 1, 150: 1, 174: 1, 198: 2, 215: 1, 244: 1, 255: 1, } test_image = collections.namedtuple( 'TestImage', ['gt_class_map', 'gt_path', 'pred_inst_path', 'pred_class_path']) test_images = [ test_image(bird_gt_instance_class_map, 'bird_gt.png', 'bird_pred_instance.png', 'bird_pred_class.png'), test_image(cat_gt_instance_class_map, 'cat_gt.png', 'cat_pred_instance.png', 'cat_pred_class.png'), test_image(team_gt_instance_class_map, 'team_gt_instance.png', 'team_pred_instance.png', 'team_pred_class.png'), ] gt_classes = [] gt_instances = [] pred_classes = [] pred_instances = [] for test_image in test_images: (image_gt_instances, image_gt_classes) = test_utils.panoptic_segmentation_with_class_map( test_image.gt_path, test_image.gt_class_map) gt_classes.append(image_gt_classes) gt_instances.append(image_gt_instances) pred_instances.append( test_utils.read_test_image(test_image.pred_inst_path, mode='L')) pred_classes.append( test_utils.read_segmentation_with_rgb_color_map( test_image.pred_class_path, _CLASS_COLOR_MAP)) gt_class_tensor = tf.placeholder(tf.uint16) gt_instance_tensor = tf.placeholder(tf.uint16) pred_class_tensor = tf.placeholder(tf.uint16) pred_instance_tensor = tf.placeholder(tf.uint16) coverings, update_ops = streaming_metrics.streaming_parsing_covering( gt_class_tensor, gt_instance_tensor, pred_class_tensor, pred_instance_tensor, num_classes=num_classes, max_instances_per_category=256, ignored_label=0, offset=offset, normalize_by_image_size=False) (per_class_coverings, per_class_weighted_ious, per_class_gt_areas) = ( tf.unstack(coverings, num=3, axis=0)) with self.session() as sess: sess.run(tf.local_variables_initializer()) for pred_class, pred_instance, gt_class, gt_instance in six.moves.zip( pred_classes, pred_instances, gt_classes, gt_instances): sess.run( update_ops, feed_dict={ gt_class_tensor: gt_class, gt_instance_tensor: gt_instance, pred_class_tensor: pred_class, pred_instance_tensor: pred_instance }) (result_per_class_coverings, result_per_class_weighted_ious, result_per_class_gt_areas) = ( sess.run( [ per_class_coverings, per_class_weighted_ious, per_class_gt_areas, ], feed_dict={ gt_class_tensor: 0, gt_instance_tensor: 0, pred_class_tensor: 0, pred_instance_tensor: 0 })) np.testing.assert_array_almost_equal( result_per_class_coverings, [ 0.0, 0.7009696912, 0.5406896552, 0.7453531599, 0.8576779026, 0.9910687881, 0.7741046032, ], decimal=4) np.testing.assert_array_almost_equal( result_per_class_weighted_ious, [ 0.0, 39864.14634, 3136, 1177.657993, 2498.41573, 33366.31289, 26671, ], decimal=4) np.testing.assert_array_equal(result_per_class_gt_areas, [ 0.0, 56870, 5800, 1580, 2913, 33667, 34454, ])
def test_streaming_metric_on_multiple_images_normalize_by_size(self): """Tests streaming parsing covering metric with image size normalization.""" num_classes = 7 offset = 256 * 256 bird_gt_instance_class_map = { 92: 5, 176: 3, 255: 4, } cat_gt_instance_class_map = { 0: 0, 255: 6, } team_gt_instance_class_map = { 0: 0, 47: 1, 97: 1, 133: 1, 150: 1, 174: 1, 198: 2, 215: 1, 244: 1, 255: 1, } test_image = collections.namedtuple( 'TestImage', ['gt_class_map', 'gt_path', 'pred_inst_path', 'pred_class_path']) test_images = [ test_image(bird_gt_instance_class_map, 'bird_gt.png', 'bird_pred_instance.png', 'bird_pred_class.png'), test_image(cat_gt_instance_class_map, 'cat_gt.png', 'cat_pred_instance.png', 'cat_pred_class.png'), test_image(team_gt_instance_class_map, 'team_gt_instance.png', 'team_pred_instance.png', 'team_pred_class.png'), ] gt_classes = [] gt_instances = [] pred_classes = [] pred_instances = [] for test_image in test_images: (image_gt_instances, image_gt_classes) = test_utils.panoptic_segmentation_with_class_map( test_image.gt_path, test_image.gt_class_map) gt_classes.append(image_gt_classes) gt_instances.append(image_gt_instances) pred_instances.append( test_utils.read_test_image(test_image.pred_inst_path, mode='L')) pred_classes.append( test_utils.read_segmentation_with_rgb_color_map( test_image.pred_class_path, _CLASS_COLOR_MAP)) gt_class_tensor = tf.placeholder(tf.uint16) gt_instance_tensor = tf.placeholder(tf.uint16) pred_class_tensor = tf.placeholder(tf.uint16) pred_instance_tensor = tf.placeholder(tf.uint16) coverings, update_ops = streaming_metrics.streaming_parsing_covering( gt_class_tensor, gt_instance_tensor, pred_class_tensor, pred_instance_tensor, num_classes=num_classes, max_instances_per_category=256, ignored_label=0, offset=offset, normalize_by_image_size=True) (per_class_coverings, per_class_weighted_ious, per_class_gt_areas) = ( tf.unstack(coverings, num=3, axis=0)) with self.session() as sess: sess.run(tf.local_variables_initializer()) for pred_class, pred_instance, gt_class, gt_instance in six.moves.zip( pred_classes, pred_instances, gt_classes, gt_instances): sess.run( update_ops, feed_dict={ gt_class_tensor: gt_class, gt_instance_tensor: gt_instance, pred_class_tensor: pred_class, pred_instance_tensor: pred_instance }) (result_per_class_coverings, result_per_class_weighted_ious, result_per_class_gt_areas) = ( sess.run( [ per_class_coverings, per_class_weighted_ious, per_class_gt_areas, ], feed_dict={ gt_class_tensor: 0, gt_instance_tensor: 0, pred_class_tensor: 0, pred_instance_tensor: 0 })) np.testing.assert_array_almost_equal( result_per_class_coverings, [ 0.0, 0.7009696912, 0.5406896552, 0.7453531599, 0.8576779026, 0.9910687881, 0.7741046032, ], decimal=4) np.testing.assert_array_almost_equal( result_per_class_weighted_ious, [ 0.0, 0.5002088756, 0.03935002196, 0.03086105851, 0.06547211033, 0.8743792686, 0.2549565051, ], decimal=4) np.testing.assert_array_almost_equal( result_per_class_gt_areas, [ 0.0, 0.7135955832, 0.07277746408, 0.04140461216, 0.07633647799, 0.8822589099, 0.3293566581, ], decimal=4)
def test_streaming_metric_on_single_image(self): offset = 256 * 256 instance_class_map = { 0: 0, 47: 1, 97: 1, 133: 1, 150: 1, 174: 1, 198: 2, 215: 1, 244: 1, 255: 1, } gt_instances, gt_classes = test_utils.panoptic_segmentation_with_class_map( 'team_gt_instance.png', instance_class_map) pred_classes = test_utils.read_segmentation_with_rgb_color_map( 'team_pred_class.png', _CLASS_COLOR_MAP) pred_instances = test_utils.read_test_image( 'team_pred_instance.png', mode='L') gt_class_tensor = tf.placeholder(tf.uint16) gt_instance_tensor = tf.placeholder(tf.uint16) pred_class_tensor = tf.placeholder(tf.uint16) pred_instance_tensor = tf.placeholder(tf.uint16) coverings, update_ops = streaming_metrics.streaming_parsing_covering( gt_class_tensor, gt_instance_tensor, pred_class_tensor, pred_instance_tensor, num_classes=3, max_instances_per_category=256, ignored_label=0, offset=offset, normalize_by_image_size=False) (per_class_coverings, per_class_weighted_ious, per_class_gt_areas) = ( tf.unstack(coverings, num=3, axis=0)) feed_dict = { gt_class_tensor: gt_classes, gt_instance_tensor: gt_instances, pred_class_tensor: pred_classes, pred_instance_tensor: pred_instances } with self.session() as sess: sess.run(tf.local_variables_initializer()) sess.run(update_ops, feed_dict=feed_dict) (result_per_class_coverings, result_per_class_weighted_ious, result_per_class_gt_areas) = ( sess.run([ per_class_coverings, per_class_weighted_ious, per_class_gt_areas, ], feed_dict=feed_dict)) np.testing.assert_array_almost_equal( result_per_class_coverings, [0.0, 0.7009696912, 0.5406896552], decimal=4) np.testing.assert_array_almost_equal( result_per_class_weighted_ious, [0.0, 39864.14634, 3136], decimal=4) np.testing.assert_array_equal(result_per_class_gt_areas, [0, 56870, 5800])