Exemplo n.º 1
0
def score(path_predictions, path_groundtruth, path_output, iou_threshold=.4):

    assert (iou_threshold < 1 and iou_threshold > 0)

    ttime = time.time()
    boxes_dict = {}
    pchips = []
    stclasses = []
    num_preds = 0

    for file in tqdm(os.listdir(path_predictions)):
        fname = file.split(".txt")[0]
        pchips.append(fname)

        with open(path_predictions + file, 'r') as f:
            arr = np.array(list(csv.reader(f, delimiter=" ")))
            if arr.shape[0] == 0:
                #If the file is empty, we fill it in with an array of zeros
                boxes_dict[fname] = np.array([[0, 0, 0, 0, 0, 0]])
                num_preds += 1
            else:
                arr = arr[:, :6].astype(np.float64)
                threshold = iou_threshold
                arr = arr[arr[:, 5] > threshold]
                stclasses += list(arr[:, 4])
                num_preds += arr.shape[0]

                if np.any(arr[:, :4] < 0):
                    raise ValueError('Bounding boxes cannot be negative.')

                if np.any(arr[:, 5] < 0) or np.any(arr[:, 5] > 1):
                    raise ValueError(
                        'Confidence scores should be between 0 and 1.')

                boxes_dict[fname] = arr[:, :6]

    pchips = sorted(pchips)
    stclasses = np.unique(stclasses).astype(np.int64)

    gt_coords, gt_chips, gt_classes = get_labels(path_groundtruth)
    gt_coords = gt_coords[gt_chips == '5.tif']
    gt_classes = gt_classes[gt_chips == '5.tif'].astype(np.int64)
    gt_chips = gt_chips[gt_chips == '5.tif']

    gt_unique = np.unique(gt_classes.astype(np.int64))
    print(gt_unique)
    max_gt_cls = 100

    if set(pchips).issubset(set(gt_unique)):
        raise ValueError(
            'The prediction files {%s} are not in the ground truth.' %
            str(set(pchips) - (set(gt_unique))))

    print("Number of Predictions: %d" % num_preds)
    print("Number of GT: %d" % np.sum(gt_classes.shape))

    per_file_class_data = {}
    for i in gt_unique:
        per_file_class_data[i] = [[], []]

    num_gt_per_cls = np.zeros((max_gt_cls))

    for file_ind in range(len(pchips)):
        print(pchips[file_ind])
        det_box = boxes_dict[pchips[file_ind]][:, :4]
        det_scores = boxes_dict[pchips[file_ind]][:, 5]
        det_cls = boxes_dict[pchips[file_ind]][:, 4]

        gt_box = gt_coords[(gt_chips == pchips[file_ind]).flatten()]

        gt_cls = gt_classes[(gt_chips == pchips[file_ind])]

        for i in gt_unique:
            s = det_scores[det_cls == i]
            ssort = np.argsort(s)[::-1]
            per_file_class_data[i][0] += s[ssort].tolist()
            gt_box_i_cls = gt_box[gt_cls == i].flatten().tolist()
            det_box_i_cls = det_box[det_cls == i]
            det_box_i_cls = det_box_i_cls[ssort].flatten().tolist()

            gt_rects = convert_to_rectangle_list(gt_box_i_cls)
            rects = convert_to_rectangle_list(det_box_i_cls)

            matching = Matching(gt_rects, rects)

            rects_matched, gt_matched = matching.greedy_match(iou_threshold)

            #we aggregate confidence scores, rectangles, and num_gt across classes
            #per_file_class_data[i][0] += det_scores[det_cls == i].tolist()
            per_file_class_data[i][1] += rects_matched
            num_gt_per_cls[i] += len(gt_matched)

    average_precision_per_class = np.ones(max_gt_cls) * float('nan')
    per_class_p = np.ones(max_gt_cls) * float('nan')
    per_class_r = np.ones(max_gt_cls) * float('nan')

    for i in gt_unique:
        scores = np.array(per_file_class_data[i][0])
        rects_matched = np.array(per_file_class_data[i][1])

        if num_gt_per_cls[i] != 0:
            sorted_indices = np.argsort(scores)[::-1]
            tp_sum = np.cumsum(rects_matched[sorted_indices])
            fp_sum = np.cumsum(np.logical_not(rects_matched[sorted_indices]))
            precision = tp_sum / (tp_sum + fp_sum + np.spacing(1))
            recall = tp_sum / num_gt_per_cls[i]
            per_class_p[i] = np.sum(rects_matched) / len(rects_matched)
            per_class_r[i] = np.sum(rects_matched) / num_gt_per_cls[i]
            ap = ap_from_pr(precision, recall)
        else:
            ap = float('nan')
        average_precision_per_class[i] = ap

#metric splits
    metric_keys = [
        'map', 'map/small', 'map/medium', 'map/large', 'map/common', 'map/rare'
    ]

    splits = {
        'map/small': [
            17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 32, 41, 60, 62, 63, 64, 65,
            66, 91
        ],
        'map/medium': [
            11, 12, 15, 25, 29, 33, 34, 35, 36, 37, 38, 42, 44, 47, 50, 53, 56,
            59, 61, 71, 72, 73, 76, 84, 86, 93, 94
        ],
        'map/large': [13, 40, 45, 49, 51, 52, 54, 55, 57, 74, 77, 79, 83, 89],
        'map/common': [
            13, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 34, 35, 41, 47, 60,
            63, 64, 71, 72, 73, 76, 77, 79, 83, 86, 89, 91
        ],
        'map/rare': [
            11, 12, 15, 29, 32, 33, 36, 37, 38, 40, 42, 44, 45, 49, 50, 51, 52,
            53, 54, 55, 56, 57, 59, 61, 62, 65, 66, 74, 84, 93, 94
        ]
    }

    vals = {}
    vals['map'] = np.nanmean(average_precision_per_class)
    vals['map_score'] = np.nanmean(per_class_p)
    vals['mar_score'] = np.nanmean(per_class_r)

    for i in splits.keys():
        vals[i] = np.nanmean(average_precision_per_class[splits[i]])

    for i in gt_unique:
        vals[int(i)] = average_precision_per_class[int(i)]

    vals['f1'] = 2 / ((1 / (np.spacing(1) + vals['map_score'])) +
                      (1 / (np.spacing(1) + vals['mar_score'])))

    print("mAP: %f | mAP score: %f | mAR: %f | F1: %f" %
          (vals['map'], vals['map_score'], vals['mar_score'], vals['f1']))

    with open(path_output + '/score.txt', 'w') as f:
        f.write(str("%.4f" % vals['map']))

    result = []
    with open(path_output + '/metrics.txt', 'w') as f:
        for key in vals.keys():
            f.write("%s %.4f\n" % (str(key), vals[key]))
            result.append(
                str(key) + " " + str(round(float(vals[key]), 4)) + "\n")
    result = sorted(result)
    print("Final time: %s" % str(time.time() - ttime))
    return result
Exemplo n.º 2
0
def score(path_predictions, path_groundtruth, path_output, iou_threshold=.5):
    """
    Compute metrics on a number of prediction files, given a folder of prediction files
    and a ground truth.  Primary metric is mean average precision (mAP).

    Args:
        path_predictions: a folder path of prediction files.  
          Prediction files should have filename format 'XYZ.tif.txt',
          where 'XYZ.tif' is the xView TIFF file being predicted on.  
          Prediction files should be in space-delimited csv format, with each
          line like (xmin ymin xmax ymax class_prediction score_prediction)

        path_groundtruth: a file path to a single ground truth geojson

        path_output: a folder path for output scoring files

        iou_threshold: a float between 0 and 1 indicating the percentage
          iou required to count a prediction as a true positive

    Outputs:
      Writes two files to the 'path_output' parameter folder: 'score.txt' and 'metrics.txt'
      'score.txt' contains a single floating point value output: mAP
      'metrics.txt' contains the remaining metrics in per-line format (metric/class_num: score_float)

    Raises:
      ValueError: if there are files in the prediction folder that are not in the ground truth geojson.
        EG a prediction file is titled '15.tif.txt', but the file '15.tif' is not in the ground truth.

  """
    assert (iou_threshold < 1 and iou_threshold > 0)

    ttime = time.time()
    boxes_dict = {}
    pchips = []
    stclasses = []
    num_preds = 0

    for file in tqdm(os.listdir(path_predictions)):
        fname = file.split(".txt")[0]
        pchips.append(fname)

        with open(path_predictions + file, 'r') as f:
            arr = np.array(list(csv.reader(f, delimiter=" ")))
            arr = arr[:, :6].astype(np.float64)
            threshold = 0
            arr = arr[arr[:, 5] > threshold]
            stclasses += list(arr[:, 4])
            num_preds += arr.shape[0]
            if np.any(arr[:, :4] < 0):
                raise ValueError('Bounding boxes cannot be negative.')
            boxes_dict[fname] = arr[:, :6]

    pchips = sorted(pchips)
    stclasses = np.unique(stclasses).astype(np.int64)

    gt_coords, gt_chips, gt_classes = get_labels(path_groundtruth)

    gt_unique = np.unique(gt_classes.astype(np.int64))
    max_gt_cls = 100

    if set(pchips).issubset(set(gt_unique)):
        raise ValueError(
            'The prediction files {%s} are not in the ground truth.' %
            str(set(pchips) - (set(gt_unique))))

    print("Number of Predictions: %d" % num_preds)
    print("Number of GT: %d" % np.sum(gt_classes.shape))

    per_file_class_data = {}
    for i in gt_unique:
        per_file_class_data[i] = [[], []]

    num_gt_per_cls = np.zeros((max_gt_cls))

    for file_ind in range(len(pchips)):
        print(pchips[file_ind])
        det_box = boxes_dict[pchips[file_ind]][:, :4]
        det_scores = boxes_dict[pchips[file_ind]][:, 5]
        det_cls = boxes_dict[pchips[file_ind]][:, 4]

        gt_box = gt_coords[(gt_chips == pchips[file_ind]).flatten()]
        gt_cls = gt_classes[(gt_chips == pchips[file_ind])]

        for i in gt_unique:
            gt_box_i_cls = gt_box[gt_cls == i].flatten().tolist()
            det_box_i_cls = det_box[det_cls == i].flatten().tolist()

            gt_rects = convert_to_rectangle_list(gt_box_i_cls)
            rects = convert_to_rectangle_list(det_box_i_cls)

            matching = Matching(gt_rects, rects)
            rects_matched, gt_matched = matching.greedy_match(iou_threshold)

            #we aggregate confidence scores, rectangles, and num_gt across classes
            per_file_class_data[i][0] += det_scores[det_cls == i].tolist()
            per_file_class_data[i][1] += rects_matched
            num_gt_per_cls[i] += len(gt_matched)

    average_precision_per_class = np.ones(max_gt_cls) * float('nan')
    per_class_p = np.ones(max_gt_cls) * float('nan')
    per_class_r = np.ones(max_gt_cls) * float('nan')

    for i in gt_unique:
        scores = np.array(per_file_class_data[i][0])
        rects_matched = np.array(per_file_class_data[i][1])

        if num_gt_per_cls[i] != 0:
            sorted_indices = np.argsort(scores)[::-1]
            tp_sum = np.cumsum(rects_matched[sorted_indices])
            fp_sum = np.cumsum(np.logical_not(rects_matched[sorted_indices]))
            precision = tp_sum / (tp_sum + fp_sum + np.spacing(1))
            recall = tp_sum / num_gt_per_cls[i]
            per_class_p[i] = np.sum(rects_matched) / len(rects_matched)
            per_class_r[i] = np.sum(rects_matched) / num_gt_per_cls[i]
            ap = ap_from_pr(precision, recall)
        else:
            ap = float('nan')
        average_precision_per_class[i] = ap

    #metric splits
    metric_keys = [
        'map', 'map/small', 'map/medium', 'map/large', 'map/common', 'map/rare'
    ]

    splits = {
        'map/small': [
            17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 32, 41, 60, 62, 63, 64, 65,
            66, 91
        ],
        'map/medium': [
            11, 12, 15, 25, 29, 33, 34, 35, 36, 37, 38, 42, 44, 47, 50, 53, 56,
            59, 61, 71, 72, 73, 76, 84, 86, 93, 94
        ],
        'map/large': [13, 40, 45, 49, 51, 52, 54, 55, 57, 74, 77, 79, 83, 89],
        'map/common': [
            13, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 34, 35, 41, 47, 60,
            63, 64, 71, 72, 73, 76, 77, 79, 83, 86, 89, 91
        ],
        'map/rare': [
            11, 12, 15, 29, 32, 33, 36, 37, 38, 40, 42, 44, 45, 49, 50, 51, 52,
            53, 54, 55, 56, 57, 59, 61, 62, 65, 66, 74, 84, 93, 94
        ]
    }

    vals = {}
    vals['map'] = np.nanmean(average_precision_per_class)
    vals['map_score'] = np.nanmean(per_class_p)
    vals['mar_score'] = np.nanmean(per_class_r)

    for i in splits.keys():
        vals[i] = np.nanmean(average_precision_per_class[splits[i]])

    for i in gt_unique:
        vals[int(i)] = average_precision_per_class[int(i)]

    vals['f1'] = 2 / ((1 / (np.spacing(1) + vals['map_score'])) +
                      (1 / (np.spacing(1) + vals['mar_score'])))

    print("mAP: %f | mAP score: %f | mAR: %f | F1: %f" %
          (vals['map'], vals['map_score'], vals['mar_score'], vals['f1']))

    with open(path_output + '/score.txt', 'w') as f:
        f.write(str("%.8f" % vals['map']))

    with open(path_output + '/metrics.txt', 'w') as f:
        for key in vals.keys():
            f.write("%s %f\n" % (str(key), vals[key]))

    print("Final time: %s" % str(time.time() - ttime))
def score(path_predictions, path_groundtruth, path_output, iou_threshold=.5):
    """
    Compute metrics on a number of prediction files, given a folder of prediction files
    and a ground truth.  Primary metric is mean average precision (mAP).

    Args:
        path_predictions: a folder path of prediction files.  
          Prediction files should have filename format 'XYZ.tif.txt',
          where 'XYZ.tif' is the xView TIFF file being predicted on.  
          Prediction files should be in space-delimited csv format, with each
          line like (xmin ymin xmax ymax class_prediction score_prediction)

        path_groundtruth: a file path to a single ground truth geojson

        path_output: a folder path for output scoring files

        iou_threshold: a float between 0 and 1 indicating the percentage
          iou required to count a prediction as a true positive

    Outputs:
      Writes two files to the 'path_output' parameter folder: 'score.txt' and 'metrics.txt'
      'score.txt' contains a single floating point value output: mAP
      'metrics.txt' contains the remaining metrics in per-line format (metric/class_num: score_float)

    Raises:
      ValueError: if there are files in the prediction folder that are not in the ground truth geojson.
        EG a prediction file is titled '15.tif.txt', but the file '15.tif' is not in the ground truth.

  """
    assert (iou_threshold < 1 and iou_threshold > 0)

    ttime = time.time()
    boxes_dict = {}
    pchips = []
    stclasses = []
    num_preds = 0

    # pchips: prediction txt
    for file in tqdm(os.listdir(path_predictions)):
        fname = file.split(".txt")[0]
        pchips.append(fname)
        # debug
        with open(path_predictions + file, 'r') as f:

            #arr = np.array(list(csv.reader(f,delimiter=" ")))

            # maybe not needed
            predict_list = list(csv.reader(f, delimiter=" "))
            new_list = remove_invalid_predictions(predict_list)
            arr = np.array(new_list)
            if arr.shape[0] == 0:
                #If the file is empty, we fill it in with an array of zeros
                boxes_dict[fname] = np.array([[0, 0, 0, 0, 0, 0]])
                num_preds += 1
            else:
                arr = arr[:, :6].astype(np.float64)
                # TODO: may adjust the threshold of scores that to be counted as valid predictions
                # default = 0
                # There should be a nms mode
                threshold = 0.4
                arr = arr[arr[:, 5] > threshold]
                stclasses += list(arr[:, 4])
                num_preds += arr.shape[0]

                if np.any(arr[:, :4] < 0):
                    raise ValueError('Bounding boxes cannot be negative.')

                if np.any(arr[:, 5] < 0) or np.any(arr[:, 5] > 1):
                    raise ValueError(
                        'Confidence scores should be between 0 and 1.')

                boxes_dict[fname] = arr[:, :6]

    pchips = sorted(pchips)
    stclasses = np.unique(stclasses).astype(np.int64)

    # debug
    #gt_coords, gt_chips, gt_classes = get_labels(path_groundtruth)
    gt_coords, gt_chips, gt_classes, _ = get_labels_w_uid_nondamaged(
        path_groundtruth)

    # TODO: add removing bboxes over clouds manually or / test images should not contain any black chips

    gt_unique = np.unique(gt_classes.astype(np.int64))
    #debug
    print('gt_unique: ', gt_unique)
    max_gt_cls = 100  # max number of classes
    # debug
    # need to remove class 0 from evaluation
    ignored_classes = [0]
    gt_unique_ig = np.array(
        [i for i in gt_unique if int(i) not in ignored_classes],
        dtype=np.int64)

    #added
    # get statistics of ground truth
    num_gt_class = dict()
    for i in gt_unique:
        num_gt_class[i] = gt_classes[gt_classes == i].shape[0]

    if set(pchips).issubset(set(gt_unique_ig)):
        raise ValueError(
            'The prediction files {%s} are not in the ground truth.' %
            str(set(pchips) - (set(gt_unique))))

    #print("Number of Predictions: %d" % num_preds)
    #print("Number of GT: %d" % np.sum(gt_classes.shape) )

    per_file_class_data = {}
    for i in gt_unique_ig:
        per_file_class_data[i] = [[], []]

    num_gt_per_cls = np.zeros((max_gt_cls))

    for file_ind in range(len(pchips)):
        print(pchips[file_ind])
        det_box = boxes_dict[pchips[file_ind]][:, :4]
        det_scores = boxes_dict[pchips[file_ind]][:, 5]
        det_cls = boxes_dict[pchips[file_ind]][:, 4]

        gt_box = gt_coords[(gt_chips == pchips[file_ind]).flatten()]
        gt_cls = gt_classes[(gt_chips == pchips[file_ind])]

        for i in gt_unique:
            s = det_scores[det_cls == i]
            ssort = np.argsort(s)[::-1]
            per_file_class_data[i][0] += s[ssort].tolist()

            gt_box_i_cls = gt_box[gt_cls == i].flatten().tolist()
            det_box_i_cls = det_box[det_cls == i]
            det_box_i_cls = det_box_i_cls[ssort].flatten().tolist()

            gt_rects = convert_to_rectangle_list(gt_box_i_cls)
            rects = convert_to_rectangle_list(det_box_i_cls)

            matching = Matching(gt_rects, rects)
            rects_matched, gt_matched = matching.greedy_match(iou_threshold)
            # debug
            print('len(gt_matched): ', len(gt_matched))
            print('len(rects_matched): ', len(rects_matched))
            #print('rects_matched: ', rects_matched)

            #we aggregate confidence scores, rectangles, and num_gt across classes
            #per_file_class_data[i][0] += det_scores[det_cls == i].tolist()
            per_file_class_data[i][1] += rects_matched
            num_gt_per_cls[i] += len(gt_matched)

    average_precision_per_class = np.ones(max_gt_cls) * float('nan')
    per_class_p = np.ones(max_gt_cls) * float('nan')
    per_class_r = np.ones(max_gt_cls) * float('nan')

    # debug
    # need to remove class 0 from evaluation
    ignored_classes = [0]
    gt_unique_ig = np.array(
        [i for i in gt_unique if int(i) not in ignored_classes],
        dtype=np.int64)

    for i in gt_unique_ig:
        scores = np.array(per_file_class_data[i][0])
        rects_matched = np.array(per_file_class_data[i][1])

        if num_gt_per_cls[i] != 0:
            sorted_indices = np.argsort(scores)[::-1]
            tp_sum = np.cumsum(rects_matched[sorted_indices])
            fp_sum = np.cumsum(np.logical_not(rects_matched[sorted_indices]))
            # calculated using confidence scores of the bboxes that have confidence score > 0.5 (or some other threshold)
            precision = tp_sum / (tp_sum + fp_sum + np.spacing(1))
            recall = tp_sum / num_gt_per_cls[i]
            # debug
            # per_class_precision: @IOU >= 0.5, # of correctly identified bboxes / all predicted boxes
            per_class_p[i] = np.sum(rects_matched) / len(rects_matched)
            per_class_r[i] = np.sum(rects_matched) / num_gt_per_cls[i]
            ap = ap_from_pr(precision, recall)

            # added
            print('for class: ', i)
            print('TP: ', tp_sum[-1])
            print('FP: ', fp_sum[-1])

        else:
            ap = float('nan')
        average_precision_per_class[i] = ap

    # debug
    #metric splits
    #metric_keys = ['map','map/small','map/medium','map/large',
    #'map/common','map/rare']

    metric_keys = ['map']
    '''
  splits = {
  'map/small': [17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 32, 41, 60,
                   62, 63, 64, 65, 66, 91],
  'map/medium': [11, 12, 15, 25, 29, 33, 34, 35, 36, 37, 38, 42, 44,
                  47, 50, 53, 56, 59, 61, 71, 72, 73, 76, 84, 86, 93, 94],
  'map/large': [13, 40, 45, 49, 51, 52, 54, 55, 57, 74, 77, 79, 83, 89],

  'map/common': [13,17,18,19,20,21,23,24,25,26,27,28,34,35,41,
                  47,60,63,64,71,72,73,76,77,79,83,86,89,91],
  'map/rare': [11,12,15,29,32,33,36,37,38,40,42,44,45,49,50,
                  51,52,53,54,55,56,57,59,61,62,65,66,74,84,93,94]
  }
  '''
    vals = {}
    vals['map'] = np.nanmean(average_precision_per_class)
    vals['map_score'] = np.nanmean(per_class_p)
    vals['mar_score'] = np.nanmean(per_class_r)
    '''
  for i in splits.keys():
    vals[i] = np.nanmean(average_precision_per_class[splits[i]])
  '''

    for i in gt_unique:
        vals[int(i)] = average_precision_per_class[int(i)]

    vals['f1'] = 2 / ((1 / (np.spacing(1) + vals['map_score'])) +
                      (1 / (np.spacing(1) + vals['mar_score'])))

    #print("mAP: %f | mAP score: %f | mAR: %f | F1: %f" %
    print("mAP: %f | mean precision: %f | mean recall: %f | F1: %f" %
          (vals['map'], vals['map_score'], vals['mar_score'], vals['f1']))

    with open(path_output + '/score.txt', 'w') as f:
        f.write(str("%.8f" % vals['map']))

    with open(path_output + '/metrics.txt', 'w') as f:
        for key in vals.keys():
            f.write("%s %f\n" % (str(key), vals[key]))

    # added
    print('counting score threshold larger than %s as valid prediction' %
          str(threshold))
    for k, v in num_gt_class.items():
        print('ground truth class: ', k)
        print('the count of GT labels: ', v)

    print("Number of Predictions: %d" % num_preds)
    print("Number of GT: %d" % np.sum(gt_classes.shape))

    print("Final time: %s" % str(time.time() - ttime))