def _load_object_annotations(object_annotations_file): """Loads object annotation JSON file.""" with tf.gfile.GFile(object_annotations_file, 'r') as fid: obj_annotations = json.load(fid) num_attributes = obj_annotations['info'].get('num_attributes') images = obj_annotations['images'] category_index = label_map_util.create_category_index( obj_annotations['categories']) img_to_obj_annotation = collections.defaultdict(list) logging.info('Building bounding box index.') for annotation in obj_annotations['annotations']: image_id = annotation['image_id'] img_to_obj_annotation[image_id].append(annotation) missing_annotation_count = 0 for image in images: image_id = image['id'] if image_id not in img_to_obj_annotation: missing_annotation_count += 1 logging.info('%d images are missing bboxes.', missing_annotation_count) return img_to_obj_annotation, category_index, num_attributes
def _load_object_annotations(object_annotations_file): """Loads object annotation JSON file.""" with tf.io.gfile.GFile(object_annotations_file, "r") as fid: obj_annotations = json.load(fid) try: num_attributes = obj_annotations["info"]["num_attributes"] except KeyError: print( "Get `num_attributes` by adding 1 to the muximum attribute id because COCO JSON doesn't have `info.num_attributes`." ) num_attributes = obj_annotations["attributes"][-1]["id"] + 1 images = obj_annotations["images"] category_index = label_map_util.create_category_index( obj_annotations["categories"]) img_to_obj_annotation = collections.defaultdict(list) logging.info("Building bounding box index.") for annotation in obj_annotations["annotations"]: image_id = annotation["image_id"] img_to_obj_annotation[image_id].append(annotation) missing_annotation_count = 0 for image in images: image_id = image["id"] if image_id not in img_to_obj_annotation: missing_annotation_count += 1 logging.info("%d images are missing bboxes.", missing_annotation_count) return img_to_obj_annotation, category_index, num_attributes
def __init__(self, imagePath, modelPath): # This is needed since the notebook is stored in the object_detection folder. sys.path.append("..") # Name of the directory containing the object detection module we're using self.MODEL_NAME = modelPath self.IMAGE_NAME = imagePath #print(self.IMAGE_NAME) # Grab path to current working directory CWD_PATH = os.getcwd() # Path to frozen detection graph .pb file, which contains the model that is used # for object detection. self.PATH_TO_CKPT = os.path.join(CWD_PATH, self.MODEL_NAME, 'frozen_inference_graph.pb') # Path to label map file self.PATH_TO_LABELS = os.path.join(CWD_PATH, 'research/data', 'labelmap.pbtxt') #self.PATH_TO_LABELS = "data/labelmap.pbtxt" # Path to images self.PATH_TO_IMAGE = os.path.join(CWD_PATH, 'research', self.IMAGE_NAME) print(self.PATH_TO_IMAGE) # Number of classes the object detector can identify self.NUM_CLASSES = 11 # Load the label map. # Label maps map indices to category names, so that when our convolution # network predicts `5`, we know that this corresponds to `king`. # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) self.categories = label_map_util.convert_label_map_to_categories(self.label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(self.categories) self.class_names_mapping = { 1: "ear_ring", 2: "GirlsTopWear", 3: "glass", 4: "hat", 5: "Jacket", 6: "MensShorts", 7: "MensTopWear", 8: "Pant", 9: "shoes", 10: "tie", 11: "watch" } self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(self.PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image self.image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected self.detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. self.detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0') self.detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0') # Number of objects detected self.num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
def test_create_category_index(self): categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}] category_index = label_map_util.create_category_index(categories) self.assertDictEqual({ 1: { 'name': u'1', 'id': 1 }, 2: { 'name': u'2', 'id': 2 } }, category_index)
def _create_tf_record_from_coco_annotations( annotations_file, image_dir, output_path, include_masks, num_shards): """Loads COCO annotation json files and converts to tf.Record format. Args: annotations_file: JSON file containing bounding box annotations. image_dir: Directory containing the image files. output_path: Path to output tf.Record file. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. num_shards: number of output file shards. """ with contextlib2.ExitStack() as tf_record_close_stack, \ tf.gfile.GFile(annotations_file, 'r') as fid: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_path, num_shards) groundtruth_data = json.load(fid) images = groundtruth_data['images'] category_index = label_map_util.create_category_index( groundtruth_data['categories']) annotations_index = {} if 'annotations' in groundtruth_data: tf.logging.info( 'Found groundtruth annotations. Building annotations index.') for annotation in groundtruth_data['annotations']: image_id = annotation['image_id'] if image_id not in annotations_index: annotations_index[image_id] = [] annotations_index[image_id].append(annotation) missing_annotation_count = 0 for image in images: image_id = image['id'] if image_id not in annotations_index: missing_annotation_count += 1 annotations_index[image_id] = [] tf.logging.info('%d images are missing annotations.', missing_annotation_count) total_num_annotations_skipped = 0 for idx, image in enumerate(images): if idx % 100 == 0: tf.logging.info('On image %d of %d', idx, len(images)) annotations_list = annotations_index[image['id']] _, tf_example, num_annotations_skipped = create_tf_example( image, annotations_list, image_dir, category_index, include_masks) total_num_annotations_skipped += num_annotations_skipped shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) tf.logging.info('Finished writing, skipped %d annotations.', total_num_annotations_skipped)
def __init__(self, imagename, modelpath): self.IMAGE = imagename self.MODEL_NAME = modelpath sys.path.append("..") CWD_path = os.getcwd() self.PATH_TO_CKPT = os.path.join(CWD_path, self.MODEL_NAME, 'frozen_inference_graph.pb') self.PATH_TO_LABELS = os.path.join(CWD_path, 'research/data', 'labelmap.pbtxt') self.PATH_TO_IMAGE = os.path.join(CWD_path, 'research', self.IMAGE) self.NUM_OF_CLASSES = 1 self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) self.categories = label_map_util.convert_label_map_to_categories( self.label_map, max_num_classes=self.NUM_OF_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index( self.categories) self.class_names_mapping = {1: "Helmets"} self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(self.PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Input tensor is the image self.image_tensor = self.detection_graph.get_tensor_by_name( 'image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected self.detection_boxes = self.detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. self.detection_scores = self.detection_graph.get_tensor_by_name( 'detection_scores:0') self.detection_classes = self.detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected self.num_detections = self.detection_graph.get_tensor_by_name( 'num_detections:0')
def evaluate(self): """Compute evaluation result. Returns: A dictionary of metrics with the following fields - 1. summary_metrics: 'Precision/mAP@<matching_iou_threshold>IOU': mean average precision at the specified IOU threshold. 2. per_category_ap: category specific results with keys of the form 'PerformanceByCategory/mAP@<matching_iou_threshold>IOU/category'. """ (per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc) = (self._evaluation.evaluate()) pascal_metrics = { self._metric_prefix + 'Precision/mAP@{}IOU'.format(self._matching_iou_threshold): mean_ap } if self._evaluate_corlocs: pascal_metrics[self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format( self._matching_iou_threshold)] = mean_corloc category_index = label_map_util.create_category_index(self._categories) for idx in range(per_class_ap.size): if idx + self._label_id_offset in category_index: category_name = category_index[idx + self._label_id_offset]['name'] category_name = unicodedata.normalize('NFKD', category_name).encode( 'ascii', 'ignore') category_name = getattr(category_name, 'decode', lambda: category_name)() display_name = ( self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format( self._matching_iou_threshold, category_name)) pascal_metrics[display_name] = per_class_ap[idx] # Optionally add CorLoc metrics.classes if self._evaluate_corlocs: display_name = ( self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'.format( self._matching_iou_threshold, category_name)) pascal_metrics[display_name] = per_class_corloc[idx] return pascal_metrics
def _load_object_annotations(object_annotations_file): with tf.io.gfile.GFile(object_annotations_file, 'r') as fid: obj_annotations = json.load(fid) images = obj_annotations['images'] category_index = label_map_util.create_category_index( obj_annotations['categories']) img_to_obj_annotation = collections.defaultdict(list) tf.compat.v1.logging.info('Building bounding box index.') for annotation in obj_annotations['annotations']: image_id = annotation['image_id'] img_to_obj_annotation[image_id].append(annotation) missing_annotation_count = 0 for image in images: image_id = image['id'] if image_id not in img_to_obj_annotation: missing_annotation_count += 1 tf.compat.v1.logging.info('%d images are missing bboxes.', missing_annotation_count) return images, img_to_obj_annotation, category_index
def __init__(self, imagePath, modelPath): # This is needed since the notebook is stored in the object_detection folder. sys.path.append("..") # Name of the directory containing the object detection module we're using self.MODEL_NAME = modelPath self.IMAGE_NAME = imagePath # print(self.IMAGE_NAME) # Grab path to current working directory CWD_PATH = os.getcwd() # Path to frozen detection graph .pb file, which contains the model that is used # for object detection. self.PATH_TO_CKPT = os.path.join(CWD_PATH, self.MODEL_NAME, 'frozen_inference_graph.pb') # Path to label map file self.PATH_TO_LABELS = os.path.join(CWD_PATH, 'research/data', 'labelmap.pbtxt') # self.PATH_TO_LABELS = "data/labelmap.pbtxt" # Path to images self.PATH_TO_IMAGE = os.path.join(CWD_PATH, 'research', self.IMAGE_NAME) print(self.PATH_TO_IMAGE) # Number of classes the object detector can identify self.NUM_CLASSES = 1 # Load the label map. # Label maps map indices to category names, so that when our convolution # network predicts `5`, we know that this corresponds to `king`. # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) self.categories = label_map_util.convert_label_map_to_categories(self.label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(self.categories) ''' self.class_names_mapping = { 1: "person", 2: "bicycle", 3: "car", 4: "motorcycle", 5: "airplane", 6: "bus", 7: "train", 8: "truck", 9: "boat", 10: "traffic light", 11: "fire hydrant", 13: "stop sign", 14: "parking meter", 15: "bench", 16: "bird", 17: "cat", 18: "dog", 19: "horse", 20: "sheep", 21: "cow", 22: "elephant", 23: "bear", 24: "zebra", 25: "giraffe", 27: "backpack", 28: "umbrella", 31: "handbag", 32: "tie", 33: "suitcase", 34: "frisbee", 35: "skis", 36: "snowboard", 37: "sports ball", 38: "kite", 39: "baseball bat", 40: "baseball glove", 41: "skateboard", 42: "surfboard", 43: "tennis racket", 44: "bottle", 46: "wine glass", 47: "cup", 48: "fork", 49: "knife", 50: "spoon", 51: "bowl", 52: "banana", 53: "apple", 54: "sandwich", 55: "orange", 56: "broccoli", 57: "carrot", 58: "hot dog", 59: "pizza", 60: "donut", 61: "cake", 62: "chair", 63: "couch", 64: "potted plant", 65: "bed", 67: "dining table", 70: "toilet", 72: "tv", 73: "laptop", 74: "mouse", 75: "remote", 76: "keyboard", 77: "cell", 78: "microwave", 79: "oven", 80: "toaster", 81: "sink", 82: "refrigerator", 84: "book", 85: "clock", 86: "vase", 87: "scissors", 88: "teddy bear", 89: "hair drier", 90: "toothbrush" }''' self.class_names_mapping = {1: "Helmets"} self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(self.PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image self.image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected self.detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. self.detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0') self.detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0') # Number of objects detected self.num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, os.getcwd()) """ # Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Helper code def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) # For the sake of simplicity we will use only 2 images: # image1.jpg # image2.jpg # If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS. PATH_TO_TEST_IMAGES_DIR = r'D:\Temp\model\models\research\object_detection\test_images\test_images' TEST_IMAGE_PATHS = glob(PATH_TO_TEST_IMAGES_DIR + '/*jpg')
def visualize_detection_results(result_dict, tag, global_step, categories, summary_dir='', export_dir='', agnostic_mode=False, show_groundtruth=False, groundtruth_box_visualization_color='black', min_score_thresh=.5, max_num_predictions=20, skip_scores=False, skip_labels=False, keep_image_id_for_visualization_export=False): """Visualizes detection results and writes visualizations to image summaries. This function visualizes an image with its detected bounding boxes and writes to image summaries which can be viewed on tensorboard. It optionally also writes images to a directory. In the case of missing entry in the label map, unknown class name in the visualization is shown as "N/A". Args: result_dict: a dictionary holding groundtruth and detection data corresponding to each image being evaluated. The following keys are required: 'original_image': a numpy array representing the image with shape [1, height, width, 3] or [1, height, width, 1] 'detection_boxes': a numpy array of shape [N, 4] 'detection_scores': a numpy array of shape [N] 'detection_classes': a numpy array of shape [N] The following keys are optional: 'groundtruth_boxes': a numpy array of shape [N, 4] 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] Detections are assumed to be provided in decreasing order of score and for display, and we assume that scores are probabilities between 0 and 1. tag: tensorboard tag (string) to associate with image. global_step: global step at which the visualization are generated. categories: a list of dictionaries representing all possible categories. Each dict in this list has the following keys: 'id': (required) an integer id uniquely identifying this category 'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza' 'supercategory': (optional) string representing the supercategory e.g., 'animal', 'vehicle', 'food', etc summary_dir: the output directory to which the image summaries are written. export_dir: the output directory to which images are written. If this is empty (default), then images are not exported. agnostic_mode: boolean (default: False) controlling whether to evaluate in class-agnostic mode or not. show_groundtruth: boolean (default: False) controlling whether to show groundtruth boxes in addition to detected boxes groundtruth_box_visualization_color: box color for visualizing groundtruth boxes min_score_thresh: minimum score threshold for a box to be visualized max_num_predictions: maximum number of detections to visualize skip_scores: whether to skip score when drawing a single detection skip_labels: whether to skip label when drawing a single detection keep_image_id_for_visualization_export: whether to keep image identifier in filename when exported to export_dir Raises: ValueError: if result_dict does not contain the expected keys (i.e., 'original_image', 'detection_boxes', 'detection_scores', 'detection_classes') """ detection_fields = fields.DetectionResultFields input_fields = fields.InputDataFields if not set([ input_fields.original_image, detection_fields.detection_boxes, detection_fields.detection_scores, detection_fields.detection_classes, ]).issubset(set(result_dict.keys())): raise ValueError('result_dict does not contain all expected keys.') if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: raise ValueError( 'If show_groundtruth is enabled, result_dict must contain ' 'groundtruth_boxes.') tf.logging.info('Creating detection visualizations.') category_index = label_map_util.create_category_index(categories) image = np.squeeze(result_dict[input_fields.original_image], axis=0) if image.shape[2] == 1: # If one channel image, repeat in RGB. image = np.tile(image, [1, 1, 3]) detection_boxes = result_dict[detection_fields.detection_boxes] detection_scores = result_dict[detection_fields.detection_scores] detection_classes = np.int32( (result_dict[detection_fields.detection_classes])) detection_keypoints = result_dict.get(detection_fields.detection_keypoints) detection_masks = result_dict.get(detection_fields.detection_masks) detection_boundaries = result_dict.get( detection_fields.detection_boundaries) # Plot groundtruth underneath detections if show_groundtruth: groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] groundtruth_keypoints = result_dict.get( input_fields.groundtruth_keypoints) vis_utils.visualize_boxes_and_labels_on_image_array( image=image, boxes=groundtruth_boxes, classes=None, scores=None, category_index=category_index, keypoints=groundtruth_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=None, groundtruth_box_visualization_color= groundtruth_box_visualization_color) vis_utils.visualize_boxes_and_labels_on_image_array( image, detection_boxes, detection_classes, detection_scores, category_index, instance_masks=detection_masks, instance_boundaries=detection_boundaries, keypoints=detection_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=max_num_predictions, min_score_thresh=min_score_thresh, agnostic_mode=agnostic_mode, skip_scores=skip_scores, skip_labels=skip_labels) if export_dir: if keep_image_id_for_visualization_export and result_dict[ fields.InputDataFields().key]: export_path = os.path.join( export_dir, 'export-{}-{}.png'.format( tag, result_dict[fields.InputDataFields().key])) else: export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) vis_utils.save_image_array_as_png(image, export_path) summary = tf.Summary(value=[ tf.Summary.Value(tag=tag, image=tf.Summary.Image( encoded_image_string=vis_utils. encode_image_array_as_png_str(image))) ]) summary_writer = tf.summary.FileWriterCache.get(summary_dir) summary_writer.add_summary(summary, global_step) tf.logging.info('Detection visualizations written to summary with tag %s.', tag)
def _create_tf_record_from_coco_annotations(annotations_file, image_dir, output_path, include_masks, num_shards): """Loads COCO annotation json files and converts to tf.Record format. Args: annotations_file: JSON file containing bounding box annotations. image_dir: Directory containing the image files. output_path: Path to output tf.Record file. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. num_shards: Number of output files to create. """ with tf.gfile.GFile(annotations_file, 'r') as fid: groundtruth_data = json.load(fid) images = groundtruth_data['images'] category_index = label_map_util.create_category_index( groundtruth_data['categories']) annotations_index = {} if 'annotations' in groundtruth_data: tf.logging.info( 'Found groundtruth annotations. Building annotations index.') for annotation in groundtruth_data['annotations']: image_id = annotation['image_id'] if image_id not in annotations_index: annotations_index[image_id] = [] annotations_index[image_id].append(annotation) missing_annotation_count = 0 for image in images: image_id = image['id'] if image_id not in annotations_index: missing_annotation_count += 1 annotations_index[image_id] = [] tf.logging.info('%d images are missing annotations.', missing_annotation_count) tf.logging.info('writing to output path: %s', output_path) writers = [ tf.python_io.TFRecordWriter(output_path + '-%05d-of-%05d.tfrecord' % (i, num_shards)) for i in range(num_shards) ] pool = multiprocessing.Pool() total_num_annotations_skipped = 0 for idx, (_, tf_example, num_annotations_skipped) in enumerate( pool.imap(_pool_create_tf_example, [(image, annotations_index[image['id']], image_dir, category_index, include_masks) for image in images])): if idx % 100 == 0: tf.logging.info('On image %d of %d', idx, len(images)) total_num_annotations_skipped += num_annotations_skipped writers[idx % num_shards].write(tf_example.SerializeToString()) pool.close() pool.join() for writer in writers: writer.close() tf.logging.info('Finished writing, skipped %d annotations.', total_num_annotations_skipped)
def _load_label_map(self): label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) return category_index