def megadb_to_cct(dataset_name, mega_db, output_path, bbox_only): mega_db = [i for i in mega_db if i['dataset'] == dataset_name] assert len(mega_db) > 0, 'There are no entries from the dataset {}'.format(dataset_name) for i in mega_db: del i['dataset'] # all remaining fields will be added to the CCT database print('Number of entries belonging to dataset {}: {}'.format(dataset_name, len(mega_db))) cct_images, cct_annotations = break_into_images_annotations(mega_db, bbox_only) # consolidate categories category_names = set() for anno in cct_annotations: category_names.add(anno['category_name']) cat_name_to_id = { 'empty': 0 # always set empty to 0 even for dataset without 'empty' labeled images } if bbox_only: cat_name_to_id['animal'] = 1 cat_name_to_id['person'] = 2 cat_name_to_id['group'] = 3 cat_name_to_id['vehicle'] = 4 for cat in category_names: if cat not in cat_name_to_id: cat_name_to_id[cat] = len(cat_name_to_id) for anno in cct_annotations: anno['category_id'] = cat_name_to_id[anno['category_name']] del anno['category_name'] cct_categories = [] for name, num_id in cat_name_to_id.items(): cct_categories.append({ 'id': num_id, 'name': name }) print('Final CCT DB has {} image entries, and {} annotation entries.'.format(len(cct_images), len(cct_annotations))) cct_db = { 'info': { 'version': str(datetime.now()), 'date_created': str(datetime.today().date()), 'description': '' # to be filled by main() }, 'images': cct_images, 'categories': cct_categories, 'annotations': cct_annotations } cct_db = CameraTrapJsonUtils.order_db_keys(cct_db) cct_db['info']['description'] = 'COCO Camera Traps database converted from sequences in dataset {}'.format( dataset_name) print('Writing to output file...') write_json(output_path, cct_db) print('Done!')
def empty_accuracy_seq_level(gt_db_indexed, detector_output_path, file_to_image_id, threshold=0.5, visualize_wrongly_classified=False, images_dir=''): """ Ground truth label is empty if the fine-category label on all images in this sequence are "empty" Args: gt_db_indexed: an instance of IndexedJsonDb containing the ground truth detector_output_path: path to a file containing the detection results in the API output format file_to_image_id: see load_api_results.py - a function to convert image_id threshold: threshold between 0 and 1 below which an image is considered empty visualize_wrongly_classified: True if want to visualize 5 sequences where the predicted classes don't agree with gt images_dir: directory where the 'file' field in the detector output is rooted at. Relevant only if visualize_wrongly_classified is true Returns: """ # TODO move detector_output_path specific code out so that this function evaluates only on classification results (confidences) gt_seq_id_to_annotations = CameraTrapJsonUtils.annotations_groupby_image_field( gt_db_indexed, image_field='seq_id') pred_seq_id_to_res = load_api_results.api_results_groupby( detector_output_path, gt_db_indexed, file_to_image_id) gt_seq_level = [] pred_seq_level = [] empty_category_id_in_gt = gt_db_indexed.cat_name_to_id['empty'] # evaluate on sequences that are present in both gt and the detector output file gt_sequences = set(gt_seq_id_to_annotations.keys()) pred_sequences = set(pred_seq_id_to_res.keys()) diff = gt_sequences.symmetric_difference(pred_sequences) print('Number of sequences not in both gt and pred: {}'.format(len(diff))) intersection_sequences = list(gt_sequences.intersection(pred_sequences)) for seq_id in intersection_sequences: gt_seq_level.append( is_gt_seq_non_empty(gt_seq_id_to_annotations[seq_id], empty_category_id_in_gt)) pred_seq_level.append(pred_seq_max_conf(pred_seq_id_to_res[seq_id])) pred_class = [ 0 if max_conf < threshold else 1 for max_conf in pred_seq_level ] accuracy = accuracy_score(gt_seq_level, pred_class) if visualize_wrongly_classified: show_wrongly_classified_seq(pred_seq_id_to_res, intersection_sequences, gt_seq_level, pred_class, images_dir) return accuracy, gt_seq_level, pred_seq_level, intersection_sequences
def get_number_empty_seq(gt_db_indexed): gt_seq_id_to_annotations = CameraTrapJsonUtils.annotations_groupby_image_field( gt_db_indexed, image_field='seq_id') empty_category_id_in_gt = gt_db_indexed.cat_name_to_id['empty'] gt_seq_level = [] for seq_id, seq_annotations in gt_seq_id_to_annotations.items(): gt_seq_level.append( is_gt_seq_non_empty(seq_annotations, empty_category_id_in_gt)) total = len(gt_seq_level) num_empty = total - sum(gt_seq_level) print('There are {} sequences, {} are empty, which is {}%'.format( total, num_empty, 100 * num_empty / total))
def render_image_with_gt(file_info): image_relative_path = file_info[0] max_conf = file_info[1] detections = file_info[2] # This should already have been normalized to either '/' or '\' image_id = ground_truth_indexed_db.filename_to_id.get(image_relative_path, None) if image_id is None: print('Warning: couldn''t find ground truth for image {}'.format(image_relative_path)) return None image = ground_truth_indexed_db.image_id_to_image[image_id] annotations = ground_truth_indexed_db.image_id_to_annotations[image_id] gt_status = image['_detection_status'] gt_presence = bool(gt_status) gt_classes = CameraTrapJsonUtils.annotations_to_classnames( annotations,ground_truth_indexed_db.cat_id_to_name) gt_class_summary = ','.join(gt_classes) if gt_status > DetectionStatus.DS_MAX_DEFINITIVE_VALUE: print('Skipping image {}, does not have a definitive ground truth status (status: {}, classes: {})'.format( image_id, gt_status, gt_class_summary)) return None detected = max_conf > options.confidence_threshold if gt_presence and detected: if '_classification_accuracy' not in image.keys(): res = 'tp' elif np.isclose(1, image['_classification_accuracy']): res = 'tpc' else: res = 'tpi' elif not gt_presence and detected: res = 'fp' elif gt_presence and not detected: res = 'fn' else: res = 'tn' display_name = '<b>Result type</b>: {}, <b>Presence</b>: {}, <b>Class</b>: {}, <b>Max conf</b>: {:0.3f}%, <b>Image</b>: {}'.format( res.upper(), str(gt_presence), gt_class_summary, max_conf * 100, image_relative_path) rendered_image_html_info = render_bounding_boxes(options.image_base_dir, image_relative_path, display_name, detections, res, detection_categories_map, classification_categories_map, options) image_result = None if len(rendered_image_html_info) > 0: image_result = [[res,rendered_image_html_info]] for gt_class in gt_classes: image_result.append(['class_{}'.format(gt_class),rendered_image_html_info]) return image_result
def process_batch_results(options): ##%% Expand some options for convenience output_dir = options.output_dir confidence_threshold = options.confidence_threshold ##%% Prepare output dir os.makedirs(output_dir, exist_ok=True) ##%% Load ground truth if available ground_truth_indexed_db = None if len(options.ground_truth_json_file) > 0: ground_truth_indexed_db = IndexedJsonDb( options.ground_truth_json_file, b_normalize_paths=True, filename_replacements=options.ground_truth_filename_replacements) # Mark images in the ground truth as positive or negative (nNegative, nPositive, nUnknown, nAmbiguous) = mark_detection_status( ground_truth_indexed_db, negative_classes=options.negative_classes, unknown_classes=options.unlabeled_classes) print( 'Finished loading and indexing ground truth: {} negative, {} positive, {} unknown, {} ambiguous' .format(nNegative, nPositive, nUnknown, nAmbiguous)) ##%% Load detection results detection_results = load_api_results( options.detector_output_file, normalize_paths=True, filename_replacements=options.detector_output_filename_replacements) # Add a column (pred_detection_label) to indicate predicted detection status import numpy as np detection_results['pred_detection_label'] = \ np.where(detection_results['max_confidence'] >= options.confidence_threshold, DetectionStatus.DS_POSITIVE, DetectionStatus.DS_NEGATIVE) nPositives = sum(detection_results['pred_detection_label'] == DetectionStatus.DS_POSITIVE) print( 'Finished loading and preprocessing {} rows from detector output, predicted {} positives' .format(len(detection_results), nPositives)) ##%% If we have ground truth, remove images we can't match to ground truth # ground_truth_indexed_db.db['images'][0] if ground_truth_indexed_db is not None: b_match = [False] * len(detection_results) detector_files = detection_results['image_path'].to_list() for iFn, fn in enumerate(detector_files): # assert fn in ground_truth_indexed_db.filename_to_id, 'Could not find ground truth for row {} ({})'.format(iFn,fn) if fn in fn in ground_truth_indexed_db.filename_to_id: b_match[iFn] = True print('Confirmed filename matches to ground truth for {} of {} files'. format(sum(b_match), len(detector_files))) detection_results = detection_results[b_match] detector_files = detection_results['image_path'].to_list() print('Trimmed detection results to {} files'.format( len(detector_files))) ##%% Sample images for visualization images_to_visualize = detection_results if options.num_images_to_sample > 0 and options.num_images_to_sample < len( detection_results): images_to_visualize = images_to_visualize.sample( options.num_images_to_sample, random_state=options.sample_seed) ##%% Fork here depending on whether or not ground truth is available # If we have ground truth, we'll compute precision/recall and sample tp/fp/tn/fn. # # Otherwise we'll just visualize detections/non-detections. if ground_truth_indexed_db is not None: ##%% Compute precision/recall # numpy array of detection probabilities p_detection = detection_results['max_confidence'].values n_detections = len(p_detection) # numpy array of bools (0.0/1.0) gt_detections = np.zeros(n_detections, dtype=float) for iDetection, fn in enumerate(detector_files): image_id = ground_truth_indexed_db.filename_to_id[fn] image = ground_truth_indexed_db.image_id_to_image[image_id] detection_status = image['_detection_status'] if detection_status == DetectionStatus.DS_NEGATIVE: gt_detections[iDetection] = 0.0 elif detection_status == DetectionStatus.DS_POSITIVE: gt_detections[iDetection] = 1.0 else: gt_detections[iDetection] = -1.0 # Don't include ambiguous/unknown ground truth in precision/recall analysis b_valid_ground_truth = gt_detections >= 0.0 p_detection_pr = p_detection[b_valid_ground_truth] gt_detections_pr = gt_detections[b_valid_ground_truth] print('Including {} of {} values in p/r analysis'.format( np.sum(b_valid_ground_truth), len(b_valid_ground_truth))) precisions, recalls, thresholds = precision_recall_curve( gt_detections_pr, p_detection_pr) # For completeness, include the result at a confidence threshold of 1.0 thresholds = np.append(thresholds, [1.0]) precisions_recalls = pd.DataFrame( data={ 'confidence_threshold': thresholds, 'precision': precisions, 'recall': recalls }) # Compute and print summary statistics average_precision = average_precision_score(gt_detections_pr, p_detection_pr) print('Average precision: {:.2f}'.format(average_precision)) # Thresholds go up throughout precisions/recalls/thresholds; find the last # value where recall is at or above target. That's our precision @ target recall. target_recall = 0.9 b_above_target_recall = np.where(recalls >= target_recall) if not np.any(b_above_target_recall): precision_at_target_recall = 0.0 else: i_target_recall = np.argmax(b_above_target_recall) precision_at_target_recall = precisions[i_target_recall] print('Precision at {:.2f} recall: {:.2f}'.format( target_recall, precision_at_target_recall)) cm = confusion_matrix(gt_detections_pr, np.array(p_detection_pr) > confidence_threshold) # Flatten the confusion matrix tn, fp, fn, tp = cm.ravel() precision_at_confidence_threshold = tp / (tp + fp) recall_at_confidence_threshold = tp / (tp + fn) f1 = 2.0 * (precision_at_confidence_threshold * recall_at_confidence_threshold) / \ (precision_at_confidence_threshold + recall_at_confidence_threshold) print( 'At a confidence threshold of {:.2f}, precision={:.2f}, recall={:.2f}, f1={:.2f}' .format(confidence_threshold, precision_at_confidence_threshold, recall_at_confidence_threshold, f1)) ##%% Render output # Write p/r table to .csv file in output directory pr_table_filename = os.path.join(output_dir, 'prec_recall.csv') precisions_recalls.to_csv(pr_table_filename, index=False) # Write precision/recall plot to .png file in output directory step_kwargs = ({'step': 'post'}) fig = plt.figure() plt.step(recalls, precisions, color='b', alpha=0.2, where='post') plt.fill_between(recalls, precisions, alpha=0.2, color='b', **step_kwargs) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.05]) t = 'Precision-Recall curve: AP={:0.2f}, P@{:0.2f}={:0.2f}'.format( average_precision, target_recall, precision_at_target_recall) plt.title(t) pr_figure_relative_filename = 'prec_recall.png' pr_figure_filename = os.path.join(output_dir, pr_figure_relative_filename) plt.savefig(pr_figure_filename) # plt.show(block=False) plt.close(fig) ##%% Sample true/false positives/negatives and render to html os.makedirs(os.path.join(output_dir, 'tp'), exist_ok=True) os.makedirs(os.path.join(output_dir, 'fp'), exist_ok=True) os.makedirs(os.path.join(output_dir, 'tn'), exist_ok=True) os.makedirs(os.path.join(output_dir, 'fn'), exist_ok=True) # Accumulate html image structs (in the format expected by write_html_image_lists) # for each category images_html = {'tp': [], 'fp': [], 'tn': [], 'fn': []} count = 0 # i_row = 0; row = images_to_visualize.iloc[0] for i_row, row in tqdm(images_to_visualize.iterrows(), total=len(images_to_visualize)): image_relative_path = row['image_path'] # This should already have been normalized to either '/' or '\' image_id = ground_truth_indexed_db.filename_to_id.get( image_relative_path, None) if image_id is None: print('Warning: couldn' 't find ground truth for image {}'.format( image_relative_path)) continue image_info = ground_truth_indexed_db.image_id_to_image[image_id] annotations = ground_truth_indexed_db.image_id_to_annotations[ image_id] gt_status = image_info['_detection_status'] if gt_status > DetectionStatus.DS_MAX_DEFINITIVE_VALUE: print( 'Skipping image {}, does not have a definitive ground truth status' .format(i_row, gt_status)) continue gt_presence = bool(gt_status) gt_class_name = CameraTrapJsonUtils.annotationsToString( annotations, ground_truth_indexed_db.cat_id_to_name) max_conf = row['max_confidence'] boxes_and_scores = row['detections'] detected = True if max_conf > confidence_threshold else False if gt_presence and detected: res = 'tp' elif not gt_presence and detected: res = 'fp' elif gt_presence and not detected: res = 'fn' else: res = 'tn' display_name = '<b>Result type</b>: {}, <b>Presence</b>: {}, <b>Class</b>: {}, <b>Max conf</b>: {:0.2f}%, <b>Image</b>: {}'.format( res.upper(), str(gt_presence), gt_class_name, max_conf * 100, image_relative_path) rendered_image_html_info = render_bounding_boxes( options.image_base_dir, image_relative_path, display_name, boxes_and_scores, res, options) if len(rendered_image_html_info) > 0: images_html[res].append(rendered_image_html_info) count += 1 # ...for each image in our sample print('{} images rendered'.format(count)) # Prepare the individual html image files image_counts = prepare_html_subpages(images_html, output_dir) # Write index.HTML index_page = """<html><body> <p><strong>A sample of {} images, annotated with detections above {:.1f}% confidence.</strong></p> <a href="tp.html">True positives (tp)</a> ({})<br/> <a href="tn.html">True negatives (tn)</a> ({})<br/> <a href="fp.html">False positives (fp)</a> ({})<br/> <a href="fn.html">False negatives (fn)</a> ({})<br/> <p>At a confidence threshold of {:0.1f}%, precision={:0.2f}, recall={:0.2f}</p> <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/> </body></html>""".format( count, confidence_threshold * 100, image_counts['tp'], image_counts['tn'], image_counts['fp'], image_counts['fn'], confidence_threshold * 100, precision_at_confidence_threshold, recall_at_confidence_threshold, len(detection_results), pr_figure_relative_filename) output_html_file = os.path.join(output_dir, 'index.html') with open(output_html_file, 'w') as f: f.write(index_page) print('Finished writing html to {}'.format(output_html_file)) ##%% Otherwise, if we don't have ground truth... else: ##%% Sample detections/non-detections os.makedirs(os.path.join(output_dir, 'detections'), exist_ok=True) os.makedirs(os.path.join(output_dir, 'non_detections'), exist_ok=True) # Accumulate html image structs (in the format expected by write_html_image_lists) # for each category images_html = { 'detections': [], 'non_detections': [], } count = 0 # i_row = 0; row = images_to_visualize.iloc[0] for i_row, row in tqdm(images_to_visualize.iterrows(), total=len(images_to_visualize)): image_relative_path = row['image_path'] # This should already have been normalized to either '/' or '\' max_conf = row['max_confidence'] boxes_and_scores = row['detections'] detected = True if max_conf > confidence_threshold else False if detected: res = 'detections' else: res = 'non_detections' display_name = '<b>Result type</b>: {}, <b>Image</b>: {}'.format( res.upper(), image_relative_path) rendered_image_html_info = render_bounding_boxes( options.image_base_dir, image_relative_path, display_name, boxes_and_scores, res, options) if len(rendered_image_html_info) > 0: images_html[res].append(rendered_image_html_info) count += 1 # ...for each image in our sample print('{} images rendered'.format(count)) # Prepare the individual html image files image_counts = prepare_html_subpages(images_html, output_dir) # Write index.HTML index_page = """<html><body> <p><strong>A sample of {} images, annotated with detections above {:.1f}% confidence.</strong></p> <a href="detections.html">Detections</a> ({})<br/> <a href="non_detections.html">Non-detections</a> ({})<br/> </body></html>""".format(count, confidence_threshold * 100, image_counts['detections'], image_counts['non_detections']) output_html_file = os.path.join(output_dir, 'index.html') with open(output_html_file, 'w') as f: f.write(index_page) print('Finished writing html to {}'.format(output_html_file))
def process_batch_results(options): ##%% Expand some options for convenience output_dir = options.output_dir confidence_threshold = options.confidence_threshold ##%% Prepare output dir os.makedirs(output_dir, exist_ok=True) ##%% Load ground truth if available ground_truth_indexed_db = None if options.ground_truth_json_file and len( options.ground_truth_json_file) > 0: ground_truth_indexed_db = IndexedJsonDb( options.ground_truth_json_file, b_normalize_paths=True, filename_replacements=options.ground_truth_filename_replacements) # Mark images in the ground truth as positive or negative n_negative, n_positive, n_unknown, n_ambiguous = mark_detection_status( ground_truth_indexed_db, negative_classes=options.negative_classes, unknown_classes=options.unlabeled_classes) print( 'Finished loading and indexing ground truth: {} negative, {} positive, {} unknown, {} ambiguous' .format(n_negative, n_positive, n_unknown, n_ambiguous)) ##%% Load detection results detection_results, other_fields = load_api_results( options.api_output_file, normalize_paths=True, filename_replacements=options.api_output_filename_replacements) detection_categories_map = other_fields['detection_categories'] if 'classification_categories' in other_fields: classification_categories_map = other_fields[ 'classification_categories'] else: classification_categories_map = {} # Add a column (pred_detection_label) to indicate predicted detection status, not separating out the classes detection_results['pred_detection_label'] = \ np.where(detection_results['max_detection_conf'] >= options.confidence_threshold, DetectionStatus.DS_POSITIVE, DetectionStatus.DS_NEGATIVE) n_positives = sum(detection_results['pred_detection_label'] == DetectionStatus.DS_POSITIVE) print( 'Finished loading and preprocessing {} rows from detector output, predicted {} positives' .format(len(detection_results), n_positives)) ##%% If we have ground truth, remove images we can't match to ground truth # ground_truth_indexed_db.db['images'][0] if ground_truth_indexed_db is not None: b_match = [False] * len(detection_results) detector_files = detection_results['file'].tolist() for i_fn, fn in enumerate(detector_files): # assert fn in ground_truth_indexed_db.filename_to_id, 'Could not find ground truth for row {} ({})'.format(i_fn,fn) if fn in fn in ground_truth_indexed_db.filename_to_id: b_match[i_fn] = True print('Confirmed filename matches to ground truth for {} of {} files'. format(sum(b_match), len(detector_files))) detection_results = detection_results[b_match] detector_files = detection_results['file'].tolist() print('Trimmed detection results to {} files'.format( len(detector_files))) ##%% Sample images for visualization images_to_visualize = detection_results if options.num_images_to_sample > 0 and options.num_images_to_sample <= len( detection_results): images_to_visualize = images_to_visualize.sample( options.num_images_to_sample, random_state=options.sample_seed) ##%% Fork here depending on whether or not ground truth is available output_html_file = '' # If we have ground truth, we'll compute precision/recall and sample tp/fp/tn/fn. # # Otherwise we'll just visualize detections/non-detections. if ground_truth_indexed_db is not None: ##%% DETECTION EVALUATION: Compute precision/recall # numpy array of detection probabilities p_detection = detection_results['max_detection_conf'].values n_detections = len(p_detection) # numpy array of bools (0.0/1.0), and -1 as null value gt_detections = np.zeros(n_detections, dtype=float) for i_detection, fn in enumerate(detector_files): image_id = ground_truth_indexed_db.filename_to_id[fn] image = ground_truth_indexed_db.image_id_to_image[image_id] detection_status = image['_detection_status'] if detection_status == DetectionStatus.DS_NEGATIVE: gt_detections[i_detection] = 0.0 elif detection_status == DetectionStatus.DS_POSITIVE: gt_detections[i_detection] = 1.0 else: gt_detections[i_detection] = -1.0 # Don't include ambiguous/unknown ground truth in precision/recall analysis b_valid_ground_truth = gt_detections >= 0.0 p_detection_pr = p_detection[b_valid_ground_truth] gt_detections_pr = gt_detections[b_valid_ground_truth] print('Including {} of {} values in p/r analysis'.format( np.sum(b_valid_ground_truth), len(b_valid_ground_truth))) precisions, recalls, thresholds = precision_recall_curve( gt_detections_pr, p_detection_pr) # For completeness, include the result at a confidence threshold of 1.0 thresholds = np.append(thresholds, [1.0]) precisions_recalls = pd.DataFrame( data={ 'confidence_threshold': thresholds, 'precision': precisions, 'recall': recalls }) # Compute and print summary statistics average_precision = average_precision_score(gt_detections_pr, p_detection_pr) print('Average precision: {:.1%}'.format(average_precision)) # Thresholds go up throughout precisions/recalls/thresholds; find the last # value where recall is at or above target. That's our precision @ target recall. target_recall = 0.9 b_above_target_recall = np.where(recalls >= target_recall) if not np.any(b_above_target_recall): precision_at_target_recall = 0.0 else: i_target_recall = np.argmax(b_above_target_recall) precision_at_target_recall = precisions[i_target_recall] print('Precision at {:.1%} recall: {:.1%}'.format( target_recall, precision_at_target_recall)) cm = confusion_matrix(gt_detections_pr, np.array(p_detection_pr) > confidence_threshold) # Flatten the confusion matrix tn, fp, fn, tp = cm.ravel() precision_at_confidence_threshold = tp / (tp + fp) recall_at_confidence_threshold = tp / (tp + fn) f1 = 2.0 * (precision_at_confidence_threshold * recall_at_confidence_threshold) / \ (precision_at_confidence_threshold + recall_at_confidence_threshold) print( 'At a confidence threshold of {:.1%}, precision={:.1%}, recall={:.1%}, f1={:.1%}' .format(confidence_threshold, precision_at_confidence_threshold, recall_at_confidence_threshold, f1)) ##%% CLASSIFICATION evaluation classifier_accuracies = [] # Mapping of classnames to idx for the confusion matrix. # The lambda is actually kind of a hack, because we use assume that # the following code does not reassign classname_to_idx classname_to_idx = collections.defaultdict( lambda: len(classname_to_idx)) # Confusion matrix as defaultdict of defaultdict # Rows / first index is ground truth, columns / second index is predicted category classifier_cm = collections.defaultdict( lambda: collections.defaultdict(lambda: 0)) for iDetection, fn in enumerate(detector_files): image_id = ground_truth_indexed_db.filename_to_id[fn] image = ground_truth_indexed_db.image_id_to_image[image_id] pred_class_ids = [det['classifications'][0][0] \ for det in detection_results['detections'][iDetection] if 'classifications' in det.keys()] pred_classnames = [ classification_categories_map[pd] for pd in pred_class_ids ] # If this image has classification predictions, and an unambiguous class # annotated, and is a positive image if len(pred_classnames) > 0 \ and '_unambiguous_category' in image.keys() \ and image['_detection_status'] == DetectionStatus.DS_POSITIVE: # The unambiguous category, we make this a set for easier handling afterward # TODO: actually we can replace the unambiguous category by all annotated # categories. However, then the confusion matrix doesn't make sense anymore # TODO: make sure we are using the class names as strings in both, not IDs gt_categories = set([image['_unambiguous_category']]) pred_categories = set(pred_classnames) # Compute the accuracy as intersection of union, # i.e. (# of categories in both prediciton and GT) # divided by (# of categories in either prediction or GT # In case of only one GT category, the result will be 1.0, if # prediction is one category and this category matches GT # It is 1.0/(# of predicted top-1 categories), if the GT is # one of the predicted top-1 categories. # It is 0.0, if none of the predicted categories is correct classifier_accuracies.append( len(gt_categories & pred_categories) / len(gt_categories | pred_categories)) image['_classification_accuracy'] = classifier_accuracies[-1] # Distribute this accuracy across all predicted categories in the # confusion matrix assert len(gt_categories) == 1 gt_class_idx = classname_to_idx[list(gt_categories)[0]] for pred_category in pred_categories: pred_class_idx = classname_to_idx[pred_category] classifier_cm[gt_class_idx][pred_class_idx] += 1 # If we have classification results if len(classifier_accuracies) > 0: # Build confusion matrix as array from classifier_cm all_class_ids = sorted(classname_to_idx.values()) classifier_cm_array = np.array( [[classifier_cm[r_idx][c_idx] for c_idx in all_class_ids] for r_idx in all_class_ids], dtype=float) classifier_cm_array /= ( classifier_cm_array.sum(axis=1, keepdims=True) + 1e-7) # Print some statistics print("Finished computation of {} classification results".format( len(classifier_accuracies))) print("Mean accuracy: {}".format(np.mean(classifier_accuracies))) # Prepare confusion matrix output # Get CM matrix as string sio = io.StringIO() np.savetxt(sio, classifier_cm_array * 100, fmt='%5.1f') cm_str = sio.getvalue() # Get fixed-size classname for each idx idx_to_classname = {v: k for k, v in classname_to_idx.items()} classname_list = [ idx_to_classname[idx] for idx in sorted(classname_to_idx.values()) ] classname_headers = [ '{:<5}'.format(cname[:5]) for cname in classname_list ] # Prepend class name on each line and add to the top cm_str_lines = [' ' * 16 + ' '.join(classname_headers)] cm_str_lines += [ '{:>15}'.format(cn[:15]) + ' ' + cm_line for cn, cm_line in zip(classname_list, cm_str.splitlines()) ] # print formatted confusion matrix print("Confusion matrix: ") print(*cm_str_lines, sep='\n') # Plot confusion matrix # To manually add more space at bottom: plt.rcParams['figure.subplot.bottom'] = 0.1 # Add 0.5 to figsize for every class. For two classes, this will result in # fig = plt.figure(figsize=[4,4]) fig = vis_utils.plot_confusion_matrix(classifier_cm_array, classname_list, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues, vmax=1.0, use_colorbar=True, y_label=True) cm_figure_relative_filename = 'confusion_matrix.png' cm_figure_filename = os.path.join(output_dir, cm_figure_relative_filename) plt.savefig(cm_figure_filename) plt.close(fig) ##%% Render output # Write p/r table to .csv file in output directory pr_table_filename = os.path.join(output_dir, 'prec_recall.csv') precisions_recalls.to_csv(pr_table_filename, index=False) # Write precision/recall plot to .png file in output directory t = 'Precision-Recall curve: AP={:0.1%}, P@{:0.1%}={:0.1%}'.format( average_precision, target_recall, precision_at_target_recall) fig = vis_utils.plot_precision_recall_curve(precisions, recalls, t) pr_figure_relative_filename = 'prec_recall.png' pr_figure_filename = os.path.join(output_dir, pr_figure_relative_filename) plt.savefig(pr_figure_filename) # plt.show(block=False) plt.close(fig) ##%% Sample true/false positives/negatives with correct/incorrect top-1 # classification and render to html # Accumulate html image structs (in the format expected by write_html_image_lists) # for each category, e.g. 'tp', 'fp', ..., 'class_bird', ... images_html = collections.defaultdict(lambda: []) # Add default entries by accessing them for the first time [images_html[res] for res in ['tp', 'tpc', 'tpi', 'fp', 'tn', 'fn']] for res in images_html.keys(): os.makedirs(os.path.join(output_dir, res), exist_ok=True) count = 0 # i_row = 0; row = images_to_visualize.iloc[0] for i_row, row in tqdm(images_to_visualize.iterrows(), total=len(images_to_visualize)): image_relative_path = row['file'] # This should already have been normalized to either '/' or '\' image_id = ground_truth_indexed_db.filename_to_id.get( image_relative_path, None) if image_id is None: print('Warning: couldn' 't find ground truth for image {}'.format( image_relative_path)) continue image = ground_truth_indexed_db.image_id_to_image[image_id] annotations = ground_truth_indexed_db.image_id_to_annotations[ image_id] gt_status = image['_detection_status'] if gt_status > DetectionStatus.DS_MAX_DEFINITIVE_VALUE: print( 'Skipping image {}, does not have a definitive ground truth status' .format(i_row, gt_status)) continue gt_presence = bool(gt_status) gt_classes = CameraTrapJsonUtils.annotations_to_classnames( annotations, ground_truth_indexed_db.cat_id_to_name) gt_class_summary = ','.join(gt_classes) max_conf = row['max_detection_conf'] detections = row['detections'] detected = max_conf > confidence_threshold if gt_presence and detected: if '_classification_accuracy' not in image.keys(): res = 'tp' elif np.isclose(1, image['_classification_accuracy']): res = 'tpc' else: res = 'tpi' elif not gt_presence and detected: res = 'fp' elif gt_presence and not detected: res = 'fn' else: res = 'tn' display_name = '<b>Result type</b>: {}, <b>Presence</b>: {}, <b>Class</b>: {}, <b>Max conf</b>: {:0.2f}%, <b>Image</b>: {}'.format( res.upper(), str(gt_presence), gt_class_summary, max_conf * 100, image_relative_path) rendered_image_html_info = render_bounding_boxes( options.image_base_dir, image_relative_path, display_name, detections, res, detection_categories_map, classification_categories_map, options) if len(rendered_image_html_info) > 0: images_html[res].append(rendered_image_html_info) for gt_class in gt_classes: images_html['class_{}'.format(gt_class)].append( rendered_image_html_info) count += 1 # ...for each image in our sample print('{} images rendered'.format(count)) # Prepare the individual html image files image_counts = prepare_html_subpages(images_html, output_dir) # Write index.HTML all_tp_count = image_counts['tp'] + image_counts['tpc'] + image_counts[ 'tpi'] total_count = all_tp_count + image_counts['tn'] + image_counts[ 'fp'] + image_counts['fn'] index_page = """<html><body> <h2>Evaluation</h2> <h3>Sample images</h3> <p>A sample of {} images, annotated with detections above {:.1%} confidence.</p> True positives (TP) ({} or {:0.1%})<br/> -- <a href="tpc.html">with all correct top-1 predictions (TPC)</a> ({})<br/> -- <a href="tpi.html">with one or more incorrect top-1 prediction (TPI)</a> ({})<br/> -- <a href="tp.html">without classification evaluation</a> (*) ({})<br/> <a href="tn.html">True negatives (TN)</a> ({} or {:0.1%})<br/> <a href="fp.html">False positives (FP)</a> ({} or {:0.1%})<br/> <a href="fn.html">False negatives (FN)</a> ({} or {:0.1%})<br/> <p>(*) We do not evaluate the classification result of images, if the classification information is missing, if the image contains categories like 'empty' or 'human', or if the image has multiple classification labels.</p>""".format( count, confidence_threshold, all_tp_count, all_tp_count / total_count, image_counts['tpc'], image_counts['tpi'], image_counts['tp'], image_counts['tn'], image_counts['tn'] / total_count, image_counts['fp'], image_counts['fp'] / total_count, image_counts['fn'], image_counts['fn'] / total_count) index_page += """ <h3>Detection results</h3> <p>At a confidence threshold of {:0.1%}, precision={:0.1%}, recall={:0.1%}</p> <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/> """.format(confidence_threshold, precision_at_confidence_threshold, recall_at_confidence_threshold, len(detection_results), pr_figure_relative_filename) if len(classifier_accuracies) > 0: index_page += """ <h3>Classification results</h3> <p>Classification accuracy: {:.2%}<br> The accuracy is computed only for images with exactly one classification label. The accuracy of an image is computed as 1/(number of unique detected top-1 classes), i.e. if the model detects multiple boxes with different top-1 classes, then the accuracy decreases and the image is put into 'TPI'.</p> <p>Confusion matrix:</p> <p><img src="{}"></p> <div style='font-family:monospace;display:block;'>{}</div> """.format(np.mean(classifier_accuracies), cm_figure_relative_filename, "<br>".join(cm_str_lines).replace(' ', ' ')) # Show links to each GT class index_page += "<h3>Images of specific classes:</h3>" # Add links to all available classes for cname in sorted(classname_to_idx.keys()): index_page += "<a href='class_{0}.html'>{0}</a> ({1})<br>".format( cname, len(images_html['class_{}'.format(cname)])) # Close body and html tag index_page += "</body></html>" output_html_file = os.path.join(output_dir, 'index.html') with open(output_html_file, 'w') as f: f.write(index_page) print('Finished writing html to {}'.format(output_html_file)) ##%% Otherwise, if we don't have ground truth... else: ##%% Sample detections/non-detections os.makedirs(os.path.join(output_dir, 'detections'), exist_ok=True) os.makedirs(os.path.join(output_dir, 'non_detections'), exist_ok=True) # Accumulate html image structs (in the format expected by write_html_image_lists) # for each category images_html = collections.defaultdict(lambda: []) # Add default entries by accessing them for the first time [images_html[res] for res in ['detections', 'non_detections']] for res in images_html.keys(): os.makedirs(os.path.join(output_dir, res), exist_ok=True) count = 0 has_classification_info = False # i_row = 0; row = images_to_visualize.iloc[0] for i_row, row in tqdm(images_to_visualize.iterrows(), total=len(images_to_visualize)): image_relative_path = row['file'] # This should already have been normalized to either '/' or '\' max_conf = row['max_detection_conf'] detections = row['detections'] detected = True if max_conf > confidence_threshold else False if detected: res = 'detections' else: res = 'non_detections' display_name = '<b>Result type</b>: {}, <b>Image</b>: {}, <b>Max conf</b>: {}'.format( res, image_relative_path, max_conf) rendered_image_html_info = render_bounding_boxes( options.image_base_dir, image_relative_path, display_name, detections, res, detection_categories_map, classification_categories_map, options) if len(rendered_image_html_info) > 0: images_html[res].append(rendered_image_html_info) for det in detections: if 'classifications' in det: has_classification_info = True top1_class = classification_categories_map[ det['classifications'][0][0]] images_html['class_{}'.format(top1_class)].append( rendered_image_html_info) count += 1 # ...for each image in our sample print('{} images rendered'.format(count)) # Prepare the individual html image files image_counts = prepare_html_subpages(images_html, output_dir) # Write index.HTML total_images = image_counts['detections'] + image_counts[ 'non_detections'] index_page = """<html><body> <h2>Visualization of results</h2> <p>A sample of {} images, annotated with detections above {:.1%} confidence.</p> <h3>Sample images</h3> <a href="detections.html">Detections</a> ({}, {:.1%})<br/> <a href="non_detections.html">Non-detections</a> ({}, {:.1%})<br/>""".format( count, confidence_threshold, image_counts['detections'], image_counts['detections'] / total_images, image_counts['non_detections'], image_counts['non_detections'] / total_images) if has_classification_info: index_page += "<h3>Images of detected classes</h3>" index_page += "<p>The same image might appear under multiple classes if multiple species were detected.</p>" # Add links to all available classes for cname in sorted(classification_categories_map.values()): ccount = len(images_html['class_{}'.format(cname)]) if ccount > 0: index_page += "<a href='class_{0}.html'>{0}</a> ({1})<br>".format( cname, ccount) index_page += "</body></html>" output_html_file = os.path.join(output_dir, 'index.html') with open(output_html_file, 'w') as f: f.write(index_page) print('Finished writing html to {}'.format(output_html_file)) # ...if we do/don't have ground truth ppresults = PostProcessingResults() ppresults.output_html_file = output_html_file return ppresults