def find_result_ground_truth_correspondence(gt_list, result_rect_list, area_threshold=0.7): # The json file records the overlap areas. overlap_json = GeoJsonWriter.init_output_json_structure() # The json file records the rectangles which we don't consider them as correct ones. non_overlap_json = GeoJsonWriter.init_output_json_structure() correct_results = set() matching_lists = [] # Records the result matching to which ground truths. # Find the correct and incorrect bounding boxes. for k in range(0, len(result_rect_list)): rect = result_rect_list[k] result_area = rect.get_area() matching_list = [] for i in range(0, len(gt_list), 1): gt_rect_list = gt_list[i]['rects'] for j in range(0, len(gt_rect_list), 1): gt_rect = gt_rect_list[j] gt_area = gt_rect.get_area() overlap_polygon = rect.get_overlap_polygon(gt_rect) if overlap_polygon is None: pass else: overlap_area = overlap_polygon.get_area() overlap_valid = False if overlap_area > area_threshold * gt_area or overlap_area > area_threshold * result_area: overlap_valid = True rect.positive = True gt_list[i]['found'][j] = True matching_list.append((i, j)) gt_list[i]['matching'][j].append(k) correct_results.add(rect) new_feature = GeoJsonWriter.generate_overlap_feature(overlap_polygon, overlap_valid, tag=i) overlap_json['features'].append(new_feature) matching_lists.append(matching_list) # Record the incorrect rectangles. for i in range(0, len(result_rect_list)): rect = result_rect_list[i] if not rect.positive: non_overlap_json['features'].append(GeoJsonWriter.generate_overlap_feature(Polygon(rect.points), False, i)) return matching_lists, correct_results, overlap_json, non_overlap_json
def evaluation_simple(result_rect_list, gt_list, area_threshold=0.7, group_para=0): # The json file records the overlap areas. overlap_json = GeoJsonWriter.init_output_json_structure() # The json file records the rectangles which we don't consider them as correct ones. non_overlap_json = GeoJsonWriter.init_output_json_structure() correct_results = set() # Find the correct and incorrect bounding boxes. for k in range(0, len(result_rect_list)): rect = result_rect_list[k] result_area = rect.get_area() gt_list_backup = copy.copy(gt_list) result_rect_list_backup = copy.copy(result_rect_list) is_invalid = False for i in range(0, len(gt_list), 1): gt_rect_list = gt_list[i]['rects'] for j in range(0, len(gt_rect_list)): gt_rect = gt_rect_list[j] gt_area = gt_rect.get_area() overlap_polygon = rect.get_overlap_polygon(gt_rect) if overlap_polygon is None: pass else: overlap_area = overlap_polygon.get_area() overlap_valid = False if overlap_area >= area_threshold * gt_area or overlap_area >= area_threshold * result_area: if result_area > 5 * gt_area: is_invalid = False break # pass overlap_valid = True rect.positive = True gt_list[i]['found'][j] = True correct_results.add(rect) gt_list[i]['rects'][j].text = gt_list[i]['rects'][j].text.replace(".", "") result_rect_list[k].text = result_rect_list[k].text.replace(".", "") ret, gt_mlist, result_mlist = WordEvaluation.levenshtein_distance(gt_list[i]['rects'][j].text, result_rect_list[k].text) for m in range(0, len(gt_mlist)): gt_list[i]['rects'][j].textFound[m] = gt_list[i]['rects'][j].textFound[m] or gt_mlist[m] for r in range(0, len(result_mlist)): rect.textFound[r] = rect.textFound[r] or result_mlist[r] rect.textGroup[r].append(int(gt_list[i]['rects'][j].groupId)) new_feature = GeoJsonWriter.generate_overlap_feature(overlap_polygon, overlap_valid, tag=i) overlap_json['features'].append(new_feature) rect.groundTruth.append(gt_rect.text) if is_invalid: print('Invalid roll back.') gt_list = gt_list_backup result_rect_list = result_rect_list_backup break # Compute the precision and recall precision_list = [] recall_list = [] total_extracted_characters_list = [] total_gt_characters_list = [] total_gt_characters = total_extracted_characters = 0 correct_result_characters = correct_gt_characters = 0 for r in result_rect_list: # print(r.text) for i in range(0, len(r.text)): if group_para in r.textGroup[i] or group_para == 0: if r.textFound[i]: # print(r.text[i]) correct_result_characters += 1 total_extracted_characters += 1 elif len(r.textGroup[i]) == 0: total_extracted_characters += 1 # print("\nGround truth") for i in range(0, len(gt_list), 1): gt_rect_list = gt_list[i]['rects'] for j in range(0, len(gt_rect_list), 1): if group_para == int(gt_list[i]['rects'][j].groupId) or group_para == 0: gt_rect = gt_rect_list[j] # print(gt_rect.text) for k in gt_rect.textFound: if k: correct_gt_characters += 1 total_gt_characters += 1 recall = 0 if total_gt_characters == 0 else (float(correct_gt_characters) / total_gt_characters) precision = 0 if total_extracted_characters == 0 else (float(correct_result_characters) / total_extracted_characters) print("Detected Bounding Boxes: %d" % len(result_rect_list)) print("Correct Bounding Boxes: %d" % len(correct_results)) print("Number of ground truth characters: %d" % total_gt_characters) print("Number of detected characters: %d" % total_extracted_characters) print("Number of correct detetected char: %d" % correct_result_characters) print("Text Recognition Precision: %f" % precision) print("Text Recognition Recall: %f" % recall) precision_list.append(precision) recall_list.append(recall) total_extracted_characters_list.append(total_extracted_characters) total_gt_characters_list.append(total_gt_characters) return overlap_json, non_overlap_json, precision_list, recall_list, total_extracted_characters_list, total_gt_characters_list