def generate_detections_one_image( self, image, image_id, detection_threshold=DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD): """Apply the detector to an image. Args: image: the PIL Image object image_id: a path to identify the image; will be in the `file` field of the output object detection_threshold: confidence above which to include the detection proposal Returns: A dict with the following fields, see https://github.com/microsoft/CameraTraps/tree/siyu/inference_refactor/api/batch_processing#batch-processing-api-output-format - image_id (always present) - max_detection_conf - detections, which is a list of detection objects containing `category`, `conf` and `bbox` - failure """ result = {'file': image_id} try: b_box, b_score, b_class = self._generate_detections_one_image( image) # our batch size is 1; need to loop the batch dim if supporting batch size > 1 boxes, scores, classes = b_box[0], b_score[0], b_class[0] detections_cur_image = [ ] # will be empty for an image with no confident detections max_detection_conf = 0.0 for b, s, c in zip(boxes, scores, classes): if s > detection_threshold: detection_entry = { 'category': str( int(c) ), # use string type for the numerical class label, not int 'conf': truncate_float( float(s), # cast to float for json serialization precision=TFDetector.CONF_DIGITS), 'bbox': TFDetector.__convert_coords(b) } detections_cur_image.append(detection_entry) if s > max_detection_conf: max_detection_conf = s result['max_detection_conf'] = truncate_float( float(max_detection_conf), precision=TFDetector.CONF_DIGITS) result['detections'] = detections_cur_image except Exception as e: result['failure'] = TFDetector.FAILURE_TF_INFER print('TFDetector: image {} failed during inference: {}'.format( image_id, str(e))) return result
def row_to_classification_list(row: Mapping[str, Any], label_names: Sequence[str], contains_preds: bool, label_pos: Optional[str], threshold: float, relative_conf: bool = False ) -> List[Tuple[str, float]]: """Given a mapping from label name to output probability, returns a list of tuples, (str(label_id), prob), which can be serialized into the Batch API output format. The list of tuples is returned in sorted order by the predicted probability for each label. If 'label' is in row and label_pos is not None, then we add (label_id + 1_000_000, 1.) to the list. If label_pos='first', we put this at the front of the list. Otherwise, we put it at the end. """ contains_label = ('label' in row) assert contains_label or contains_preds if relative_conf: assert contains_label and contains_preds result = [] if contains_preds: result = [(str(i), row[label]) for i, label in enumerate(label_names)] if relative_conf: label_conf = row[row['label']] result = [(k, max(v - label_conf, 0)) for k, v in result] # filter out confidences below the threshold, and set precision to 4 result = [ (k, truncate_float(conf, precision=4)) for k, conf in result if conf >= threshold ] # sort from highest to lowest probability result = sorted(result, key=lambda x: x[1], reverse=True) if contains_label and label_pos is not None: label = row['label'] label_id = label_names.index(label) item = (str(label_id + 1_000_000), 1.) if label_pos == 'first': result = [item] + result else: result.append(item) return result
def round_and_make_float(d, precision=4): return truncate_float(float(d), precision=precision)
det_list = list() det_boxes_old_format = detections[im_key]['detection_boxes'] det_classes = detections[im_key]['detection_classes'] det_conf = detections[im_key]['detection_scores'] # Convert boxes from [ymin, xmin, ymax, xmax] format to # [x_min, y_min, width_of_box, height_of_box] tmp = det_boxes_old_format.T det_boxes = np.array([tmp[1], tmp[0], tmp[3] - tmp[1], tmp[2] - tmp[0]]).T del tmp for det_id in range(len(det_boxes)): if det_conf[det_id] > DETECTION_CONF_THRESHOLD: det_list.append( dict(category=str(det_classes[det_id]), conf=ct_utils.truncate_float(det_conf[det_id].item()), bbox=ct_utils.truncate_float_array( det_boxes[det_id].tolist()))) im_dict['detections'] = det_list if len(im_dict['detections']) > 0: im_dict['max_detection_conf'] = ct_utils.truncate_float( max(det_conf).item()) else: im_dict['max_detection_conf'] = 0. js['images'].append(im_dict) # Write output json with open(args.output_json, 'wt') as fi: json.dump(js, fi, indent=1)
def make_cct_embedded(image_db=None, bbox_db=None): """ Takes in path to the COCO Camera Trap format jsons for images (species labels) and/or bboxes (animal/human/vehicle) labels and embed the class names and annotations into the image entries. Since IndexedJsonDb() can take either a path or a loaded json object as a dict, both arguments can be paths or loaded json objects Returns: an embedded version of the COCO Camera Trap format json database """ # at first a dict of image_id: image_obj with annotations embedded, then it becomes # an array of image objects docs = {} # %% integrate the image DB if image_db: print('Loading image DB...') cct_json_db = IndexedJsonDb(image_db) docs = cct_json_db.image_id_to_image # each image entry is first assigned the image object # takes in image entries and species and other annotations in the image DB num_images_with_more_than_1_species = 0 for image_id, annotations in cct_json_db.image_id_to_annotations.items(): docs[image_id]['annotations'] = { 'species': [] } if len(annotations) > 1: num_images_with_more_than_1_species += 1 for anno in annotations: # convert the species category to explicit string name cat_name = cct_json_db.cat_id_to_name[anno['category_id']] docs[image_id]['annotations']['species'].append(cat_name) # there may be other fields in the annotation object for anno_field_name, anno_field_val in anno.items(): # these fields should already be gotten from the image object if anno_field_name not in ['category_id', 'id', 'image_id', 'datetime', 'location', 'sequence_level_annotation', 'seq_id', 'seq_num_frames', 'frame_num']: docs[image_id]['annotations'][anno_field_name] = anno_field_val print('Number of items from the image DB:', len(docs)) print('Number of images with more than 1 species: {} ({}% of image DB)'.format( num_images_with_more_than_1_species, round(100 * num_images_with_more_than_1_species / len(docs), 2))) #%% integrate the bbox DB if bbox_db: print('Loading bbox DB...') cct_bbox_json_db = IndexedJsonDb(bbox_db) # add any images that are not in the image DB # also add any fields in the image object that are not present already num_added = 0 num_amended = 0 for image_id, image_obj in cct_bbox_json_db.image_id_to_image.items(): if image_id not in docs: docs[image_id] = image_obj num_added += 1 amended = False for field_name, val in image_obj.items(): if field_name not in docs[image_id]: docs[image_id][field_name] = val amended = True if amended: num_amended += 1 print('Number of images added from bbox DB entries: ', num_added) print('Number of images amended: ', num_amended) print('Number of items in total: ', len(docs)) # add bbox to the annotations field num_more_than_1_bbox = 0 for image_id, bbox_annotations in cct_bbox_json_db.image_id_to_annotations.items(): # for any newly added images if 'annotations' not in docs[image_id]: docs[image_id]['annotations'] = {} docs[image_id]['annotations']['bbox'] = [] if len(bbox_annotations) > 1: num_more_than_1_bbox += 1 for bbox_anno in bbox_annotations: item_bbox = { 'category': cct_bbox_json_db.cat_id_to_name[bbox_anno['category_id']], # 'bbox_abs': bbox_anno['bbox'], } if 'width' in docs[image_id]: image_w = docs[image_id]['width'] image_h = docs[image_id]['height'] x, y, w, h = bbox_anno['bbox'] item_bbox['bbox_rel'] = [ truncate_float(x / image_w), truncate_float(y / image_h), truncate_float(w / image_w), truncate_float(h / image_h) ] docs[image_id]['annotations']['bbox'].append(item_bbox) # not keeping height and width del docs[image_id]['width'] del docs[image_id]['height'] print('Number of images with more than one bounding box: {} ({}% of all entries)'.format( num_more_than_1_bbox, 100 * num_more_than_1_bbox / len(docs), 2)) else: print('No bbox DB provided.') assert len(docs) > 0, 'No image entries found in the image or bbox DB jsons provided.' docs = list(docs.values()) return docs
def generate_detections_one_image(self, img_original, image_id, detection_threshold): """Apply the detector to an image. Args: img_original: the PIL Image object with EXIF rotation taken into account image_id: a path to identify the image; will be in the "file" field of the output object detection_threshold: confidence above which to include the detection proposal Returns: A dict with the following fields, see the 'images' key in https://github.com/microsoft/CameraTraps/tree/master/api/batch_processing#batch-processing-api-output-format - 'file' (always present) - 'max_detection_conf' - 'detections', which is a list of detection objects containing keys 'category', 'conf' and 'bbox' - 'failure' """ result = { 'file': image_id } detections = [] max_conf = 0.0 try: img_original = np.asarray(img_original) # padded resize img = letterbox(img_original, new_shape=PTDetector.IMAGE_SIZE, stride=PTDetector.STRIDE, auto=True)[0] # JIT requires auto=False img = img.transpose((2, 0, 1)) # HWC to CHW; PIL Image is RGB already img = np.ascontiguousarray(img) img = torch.from_numpy(img) img = img.to(self.device) img = img.float() img /= 255 if len(img.shape) == 3: # always true for now, TODO add inference using larger batch size img = torch.unsqueeze(img, 0) pred: list = self.model(img)[0] # NMS pred = non_max_suppression(prediction=pred, conf_thres=detection_threshold) # format detections/bounding boxes gn = torch.tensor(img_original.shape)[[1, 0, 1, 0]] # normalization gain whwh for det in pred: if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img_original.shape).round() for *xyxy, conf, cls in reversed(det): # normalized center-x, center-y, width and height xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() api_box = ct_utils.convert_yolo_to_xywh(xywh) conf = ct_utils.truncate_float(conf.tolist(), precision=CONF_DIGITS) # MegaDetector output format's categories start at 1, but this model's start at 0 cls = int(cls.tolist()) + 1 if cls not in (1, 2, 3): raise KeyError(f'{cls} is not a valid class.') detections.append({ 'category': str(cls), 'conf': conf, 'bbox': ct_utils.truncate_float_array(api_box, precision=COORD_DIGITS) }) max_conf = max(max_conf, conf) except Exception as e: result['failure'] = FAILURE_INFER print('PTDetector: image {} failed during inference: {}'.format(image_id, str(e))) result['max_detection_conf'] = max_conf result['detections'] = detections return result
def classify_boxes(classification_graph, json_with_classes, image_dir, confidence_threshold=DEFAULT_CONFIDENCE_THRESHOLD, detection_category_whitelist=DETECTION_CATEGORY_WHITELIST, padding_factor=PADDING_FACTOR, num_annotated_classes=NUM_ANNOTATED_CLASSES): """ Takes a classification model and applies it to all detected boxes with a detection confidence larger than confidence_threshold. Args: classification_graph: frozen graph model that includes the TF-slim preprocessing. i.e. it will be given a cropped images with values in [0,1] json_with_classes: Object created from the json file that is generated by the detection API. However, the field 'classification_categories' is already added. The script assumes 0-based indexing. image_dir: Base directory of the images. All paths in the JSON are relative to this folder confidence_threshold: Only classify boxes with a threshold larger than this detection_category_whitelist: Only boxes with this detection category will be classified padding_factor: The function will enlarge the bounding boxes by this factor before passing them to the classifier. num_annotated_classes: Number of top-scoring class predictions to store in the json Returns the updated json object. Classification results are added as field 'classifications' to all elements images/detections assuming a 0-based indexing of the classifier output, i.e. output with index 0 has the class key '0' """ # Make sure we have the right json object assert 'classification_categories' in json_with_classes.keys() assert isinstance(detection_category_whitelist, list) assert all([isinstance(x, str) for x in detection_category_whitelist]) with classification_graph.as_default(): with tf.Session(graph=classification_graph) as sess: # Get input and output tensors of classification model image_tensor = classification_graph.get_tensor_by_name('input:0') predictions_tensor = classification_graph.get_tensor_by_name( 'output:0') predictions_tensor = tf.squeeze(predictions_tensor, [0]) # For each image nImages = len(json_with_classes['images']) for iImage in tqdm.tqdm(list(range(0, nImages))): image_description = json_with_classes['images'][iImage] # Read image try: image_path = image_description['file'] if image_dir: image_path = os.path.join(image_dir, image_path) image_data = np.array( PIL.Image.open(image_path).convert("RGB")) # Scale pixel values to [0,1] image_data = image_data / 255 image_height, image_width, _ = image_data.shape except KeyboardInterrupt as e: raise e except: print('Couldn\'t load image {}'.format(image_path)) continue # For each box nDetections = len(image_description['detections']) for iBox in range(nDetections): cur_detection = image_description['detections'][iBox] # Skip detections with low confidence if cur_detection['conf'] < confidence_threshold: continue # Skip if detection category is not in whitelist if not cur_detection[ 'category'] in detection_category_whitelist: continue # Skip if already classified if 'classifications' in cur_detection.keys() and len( cur_detection['classifications']) > 0: continue # Get current box in relative coordinates and format [x_min, y_min, width_of_box, height_of_box] box_orig = cur_detection['bbox'] # Convert to [ymin, xmin, ymax, xmax] and store it as 1x4 numpy array so we can # re-use the generic multi-box padding code box_coords = np.array([[ box_orig[1], box_orig[0], box_orig[1] + box_orig[3], box_orig[0] + box_orig[2] ]]) # Convert normalized coordinates to pixel coordinates box_coords_abs = (box_coords * np.tile([image_height, image_width], (1, 2))) # Pad the detected animal to a square box and additionally by PADDING_FACTOR, the result will be in crop_boxes. # # However, we need to make sure that it box coordinates are still within the image. bbox_sizes = np.vstack([ box_coords_abs[:, 2] - box_coords_abs[:, 0], box_coords_abs[:, 3] - box_coords_abs[:, 1] ]).T offsets = (padding_factor * np.max( bbox_sizes, axis=1, keepdims=True) - bbox_sizes) / 2 crop_boxes = box_coords_abs + np.hstack( [-offsets, offsets]) crop_boxes = np.maximum(0, crop_boxes).astype(int) # Get the first (and only) row as our bbox to classify crop_box = crop_boxes[0] # Get the image data for that box cropped_img = image_data[crop_box[0]:crop_box[2], crop_box[1]:crop_box[3]] # Run inference predictions = sess.run( predictions_tensor, feed_dict={image_tensor: cropped_img}) # Add an empty list to the json for our predictions cur_detection['classifications'] = list() # Add the *num_annotated_classes* top scoring classes for class_idx in np.argsort( -predictions)[:num_annotated_classes]: class_conf = ct_utils.truncate_float( predictions[class_idx].item()) cur_detection['classifications'].append( ['%i' % class_idx, class_conf]) # ...for each box # ...for each image # ...with tf.Session # with classification_graph return json_with_classes
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'dataset_name', type=str, help= 'a short string representing the dataset to be used as a partition key in the DB' ) parser.add_argument( '--image_db', type=str, help='path to the json containing the image DB in CCT format') parser.add_argument( '--bbox_db', type=str, help='path to the json containing the bbox DB in CCT format') parser.add_argument('--embedded_db', type=str, required=True, help='path to store the resulting json') args = parser.parse_args() assert len(args.dataset_name) > 0, 'dataset name cannot be an empty string' if args.image_db: assert os.path.exists( args.image_db ), 'image_db file path provided does not point to a file' if args.bbox_db: assert os.path.exists( args.bbox_db ), 'bbox_db file path provided does not point to a file' #%% integrate the image DB # at first a dict of image_id: image_obj with annotations embedded, # then its values becomes the array of documents that will get uploaded to Cosmos DB docs = {} if args.image_db: print('Loading image DB...') cct_json_db = IndexedJsonDb(args.image_db) docs = cct_json_db.image_id_to_image # each image entry is first assigned the image object # takes in image entries and species and other annotations in the image DB num_images_with_more_than_1_species = 0 for image_id, annotations in cct_json_db.image_id_to_annotations.items( ): docs[image_id]['annotations'] = {'species': []} if len(annotations) > 1: num_images_with_more_than_1_species += 1 for anno in annotations: # convert the species category to explicit string name cat_name = cct_json_db.cat_id_to_name[anno['category_id']] docs[image_id]['annotations']['species'].append(cat_name) # there may be other fields in the annotation object for anno_field_name, anno_field_val in anno.items(): # these fields should already be gotten from the image object if anno_field_name not in [ 'category_id', 'id', 'image_id', 'datetime', 'location', 'sequence_level_annotation', 'seq_id', 'seq_num_frames', 'frame_num' ]: docs[image_id]['annotations'][ anno_field_name] = anno_field_val print('Number of items from the image DB:', len(docs)) print( 'Number of images with more than 1 species: {} ({}% of image DB)'. format( num_images_with_more_than_1_species, round(100 * num_images_with_more_than_1_species / len(docs), 2))) #%% integrate the bbox DB if args.bbox_db: print('Loading bbox DB...') cct_bbox_json_db = IndexedJsonDb(args.bbox_db) # add any images that are not in the image DB # also add any fields in the image object that are not present already num_added = 0 num_amended = 0 for image_id, image_obj in cct_bbox_json_db.image_id_to_image.items(): if image_id not in docs: docs[image_id] = image_obj num_added += 1 amended = False for field_name, val in image_obj.items(): if field_name not in docs[image_id]: docs[image_id][field_name] = val amended = True if amended: num_amended += 1 print('Number of images added from bbox DB entries: ', num_added) print('Number of images amended: ', num_amended) print('Number of items in total: ', len(docs)) # add bbox to the annotations field num_more_than_1_bbox = 0 for image_id, bbox_annotations in cct_bbox_json_db.image_id_to_annotations.items( ): # for any newly added images if 'annotations' not in docs[image_id]: docs[image_id]['annotations'] = {} docs[image_id]['annotations']['bbox'] = [] if len(bbox_annotations) > 1: num_more_than_1_bbox += 1 for bbox_anno in bbox_annotations: item_bbox = { 'category': cct_bbox_json_db.cat_id_to_name[bbox_anno['category_id']], 'bbox_abs': bbox_anno['bbox'], } if 'width' in docs[image_id]: image_w = docs[image_id]['width'] image_h = docs[image_id]['height'] x, y, w, h = bbox_anno['bbox'] item_bbox['bbox_rel'] = [ truncate_float(x / image_w), truncate_float(y / image_h), truncate_float(w / image_w), truncate_float(h / image_h) ] docs[image_id]['annotations']['bbox'].append(item_bbox) print( 'Number of images with more than one bounding box: {} ({}% of all entries)' .format(num_more_than_1_bbox, 100 * num_more_than_1_bbox / len(docs), 2)) else: print('No bbox DB provided.') assert len( docs ) > 0, 'No image entries found in the image or bbox DB jsons provided.' docs = list(docs.values()) #%% processing # get rid of any trailing '.JPG' for the id field # insert the 'dataset' attribute used as the partition key # replace illegal chars (for Cosmos DB) in the id field of the image # replace directory separator with tilde ~ # rename the id field (reserved word) to image_id illegal_char_map = {'/': '~', '\\': '~', '?': '__qm__', '#': '__pound__'} for i in docs: i['id'] = i['id'].split('.JPG')[0].split('.jpg')[0] for illegal, replacement in illegal_char_map.items(): i['id'] = i['id'].replace(illegal, replacement) i['dataset'] = args.dataset_name i['image_id'] = i['id'] del i['id'] #%% some validation print('Example items:') print() print(docs[0]) print() print(docs[-1]) print() num_both_species_bbox = 0 for item in docs: if 'annotations' in item: if 'species' in item['annotations'] and 'bbox' in item[ 'annotations']: num_both_species_bbox += 1 print( 'Number of images with both species and bbox annotations: {} ({}% of all entries)' .format(num_both_species_bbox, round(100 * num_both_species_bbox / len(docs), 2))) #%% save the embedded json database write_json(args.embedded_db, docs)