def update_detection_table(RepeatDetectionResults,
                           options,
                           outputFilename=None):
    detectionResults = RepeatDetectionResults.detectionResults

    # An array of length nDirs, where each element is a list of DetectionLocation
    # objects for that directory that have been flagged as suspicious
    suspiciousDetectionsByDirectory = RepeatDetectionResults.suspiciousDetections

    nBboxChanges = 0

    print('Updating output table')

    # For each suspicious detection (two loops)
    for iDir, directoryEvents in enumerate(suspiciousDetectionsByDirectory):

        for iDetectionEvent, detectionEvent in enumerate(directoryEvents):

            locationBbox = detectionEvent.bbox

            for iInstance, instance in enumerate(detectionEvent.instances):

                instanceBbox = instance.bbox

                # This should match the bbox for the detection event
                iou = ct_utils.get_iou(instanceBbox, locationBbox)
                # There are instances where iou is very close to the threshold so cannot use >
                assert iou >= options.iouThreshold

                assert instance.filename in RepeatDetectionResults.filenameToRow
                iRow = RepeatDetectionResults.filenameToRow[instance.filename]
                row = detectionResults.iloc[iRow]
                rowDetections = row['detections']
                detectionToModify = rowDetections[instance.iDetection]

                # Make sure the bounding box matches
                assert (instanceBbox[0:3] == detectionToModify['bbox'][0:3])

                # Make the probability negative, if it hasn't been switched by
                # another bounding box
                if detectionToModify['conf'] >= 0:
                    detectionToModify['conf'] = -1 * detectionToModify['conf']
                    nBboxChanges += 1

            # ...for each instance

        # ...for each detection

    # ...for each directory

    # Update maximum probabilities

    # For each row...
    nProbChanges = 0
    nProbChangesToNegative = 0
    nProbChangesAcrossThreshold = 0

    for iRow, row in detectionResults.iterrows():

        detections = row['detections']
        if len(detections) == 0:
            continue

        maxPOriginal = float(row['max_detection_conf'])
        assert maxPOriginal >= 0

        maxP = None
        nNegative = 0

        for iDetection, detection in enumerate(detections):
            p = detection['conf']

            if p < 0:
                nNegative += 1

            if (maxP is None) or (p > maxP):
                maxP = p

        if abs(maxP - maxPOriginal) > 0.00000001:

            # We should only be making detections *less* likely
            assert maxP < maxPOriginal
            # row['max_confidence'] = str(maxP)
            detectionResults.at[iRow, 'max_detection_conf'] = maxP

            nProbChanges += 1

            if maxP < 0:
                nProbChangesToNegative += 1

            if maxPOriginal >= options.confidenceMin and maxP < options.confidenceMin:
                nProbChangesAcrossThreshold += 1

            # Negative probabilities should be the only reason maxP changed, so
            # we should have found at least one negative value
            assert nNegative > 0

        # ...if there was a change to the max probability for this row

    # ...for each row

    if outputFilename is not None:
        write_api_results(detectionResults, RepeatDetectionResults.otherFields,
                          outputFilename)

    print(
        'Finished updating detection table\nChanged {} detections that impacted {} maxPs ({} to negative) ({} across confidence threshold)'
        .format(nBboxChanges, nProbChanges, nProbChangesToNegative,
                nProbChangesAcrossThreshold))

    return detectionResults
def find_matches_in_directory(dirName, options, rowsByDirectory):
    if options.pbar is not None:
        options.pbar.update()

    # List of DetectionLocations
    candidateDetections = []

    rows = rowsByDirectory[dirName]

    # iDirectoryRow = 0; row = rows.iloc[iDirectoryRow]
    for iDirectoryRow, row in rows.iterrows():

        filename = row['file']
        if not ct_utils.is_image_file(filename):
            continue

        # Don't bother checking images with no detections above threshold
        maxP = float(row['max_detection_conf'])
        if maxP < options.confidenceMin:
            continue

        # Array of dict, where each element is
        # {
        #   'category': '1',  # str value, category ID
        #   'conf': 0.926,  # confidence of this detections
        #   'bbox': [x_min, y_min, width_of_box, height_of_box]  # (x_min, y_min) is upper-left,
        #                                                           all in relative coordinates and length
        # }
        detections = row['detections']
        assert len(detections) > 0

        # For each detection in this image
        for iDetection, detection in enumerate(detections):
            assert 'category' in detection and 'conf' in detection and 'bbox' in detection

            confidence = detection['conf']
            assert confidence >= 0.0 and confidence <= 1.0
            if confidence < options.confidenceMin:
                continue
            if confidence > options.confidenceMax:
                continue

            # Optionally exclude some classes from consideration as suspicious
            if len(options.excludeClasses) > 0:
                iClass = int(detection['category'])
                if iClass in options.excludeClasses:
                    continue

            bbox = detection['bbox']
            # Is this detection too big to be suspicious?
            w, h = bbox[2], bbox[3]
            area = h * w

            # These are relative coordinates
            assert area >= 0.0 and area <= 1.0

            if area > options.maxSuspiciousDetectionSize:
                # print('Ignoring very large detection with area {}'.format(area))
                continue

            instance = IndexedDetection(iDetection, row['file'], bbox)

            bFoundSimilarDetection = False

            # For each detection in our candidate list
            for iCandidate, candidate in enumerate(candidateDetections):

                # Is this a match?
                iou = ct_utils.get_iou(bbox, candidate.bbox)

                if iou >= options.iouThreshold:
                    bFoundSimilarDetection = True

                    # If so, add this example to the list for this detection
                    candidate.instances.append(instance)

                    # We *don't* break here; we allow this instance to possibly
                    # match multiple candidates.  There isn't an obvious right or
                    # wrong here.

            # ...for each detection on our candidate list

            # If we found no matches, add this to the candidate list
            if not bFoundSimilarDetection:
                candidate = DetectionLocation(instance, detection, dirName)
                candidateDetections.append(candidate)

        # ...for each detection

    # ...for each row

    return candidateDetections
Пример #3
0
def find_matches_in_directory(dirName, options, rowsByDirectory):

    if options.pbar is not None:
        options.pbar.update()

    # List of DetectionLocations
    candidateDetections = []

    rows = rowsByDirectory[dirName]

    # iDirectoryRow = 0; row = rows.iloc[iDirectoryRow]
    for iDirectoryRow, row in rows.iterrows():

        filename = row['file']
        if not ct_utils.is_image_file(filename):
            continue

        # Don't bother checking images with no detections above threshold
        maxP = float(row['max_detection_conf'])
        if maxP < options.confidenceMin:
            continue

        # Array of dicts, where each element is
        # {
        #   'category': '1',  # str value, category ID
        #   'conf': 0.926,  # confidence of this detections
        #   'bbox': [x_min, y_min, width_of_box, height_of_box]  # (x_min, y_min) is upper-left,
        #                                                           all in relative coordinates and length
        # }
        detections = row['detections']
        if isinstance(detections, float):
            assert isinstance(row['failure'], str)
            print('Skipping failed image {} ({})'.format(
                filename, row['failure']))
            continue

        assert len(detections) > 0

        # For each detection in this image
        for iDetection, detection in enumerate(detections):

            assert 'category' in detection and 'conf' in detection and 'bbox' in detection

            confidence = detection['conf']

            # This is no longer strictly true; I sometimes run RDE in stages, so
            # some probabilities have already been made negative
            # assert confidence >= 0.0 and confidence <= 1.0
            assert confidence >= -1.0 and confidence <= 1.0

            if confidence < options.confidenceMin:
                continue
            if confidence > options.confidenceMax:
                continue

            # Optionally exclude some classes from consideration as suspicious
            if len(options.excludeClasses) > 0:
                iClass = int(detection['category'])
                if iClass in options.excludeClasses:
                    continue

            bbox = detection['bbox']
            confidence = detection['conf']

            # Is this detection too big to be suspicious?
            w, h = bbox[2], bbox[3]

            if (w == 0 or h == 0):
                # print('Illegal zero-size bounding box on image {}'.format(filename))
                continue

            area = h * w

            # These are relative coordinates
            assert area >= 0.0 and area <= 1.0, 'Illegal bounding box area {}'.format(
                area)

            if area > options.maxSuspiciousDetectionSize:
                # print('Ignoring very large detection with area {}'.format(area))
                continue

            category = detection['category']

            instance = IndexedDetection(iDetection=iDetection,
                                        filename=row['file'],
                                        bbox=bbox,
                                        confidence=confidence,
                                        category=category)

            bFoundSimilarDetection = False

            # For each detection in our candidate list
            for iCandidate, candidate in enumerate(candidateDetections):

                # Is this a match?
                try:
                    iou = ct_utils.get_iou(bbox, candidate.bbox)
                except Exception as e:
                    print(
                        'Warning: IOU computation error on boxes ({},{},{},{}),({},{},{},{}): {}'
                        .format(bbox[0], bbox[1], bbox[2], bbox[3],
                                candidate.bbox[0], candidate.bbox[1],
                                candidate.bbox[2], candidate.bbox[3], str(e)))
                    continue

                if iou >= options.iouThreshold:

                    bFoundSimilarDetection = True

                    # If so, add this example to the list for this detection
                    candidate.instances.append(instance)

                    # We *don't* break here; we allow this instance to possibly
                    # match multiple candidates.  There isn't an obvious right or
                    # wrong here.

            # ...for each detection on our candidate list

            # If we found no matches, add this to the candidate list
            if not bFoundSimilarDetection:
                candidate = DetectionLocation(instance, detection, dirName)
                candidateDetections.append(candidate)

        # ...for each detection

    # ...for each row

    return candidateDetections
Пример #4
0
def update_detection_table(RepeatDetectionResults,
                           options,
                           outputFilename=None):

    detectionResults = RepeatDetectionResults.detectionResults

    # An array of length nDirs, where each element is a list of DetectionLocation
    # objects for that directory that have been flagged as suspicious
    suspiciousDetectionsByDirectory = RepeatDetectionResults.suspiciousDetections

    nBboxChanges = 0

    print('Updating output table')

    # For each directory
    for iDir, directoryEvents in enumerate(suspiciousDetectionsByDirectory):

        # For each suspicious detection group in this directory
        for iDetectionEvent, detectionEvent in enumerate(directoryEvents):

            locationBbox = detectionEvent.bbox

            # For each instance of this suspicious detection
            for iInstance, instance in enumerate(detectionEvent.instances):

                instanceBbox = instance.bbox

                # This should match the bbox for the detection event
                iou = ct_utils.get_iou(instanceBbox, locationBbox)

                # The bbox for this instance should be almost the same as the bbox
                # for this detection group, where "almost" is defined by the IOU
                # threshold.
                assert iou >= options.iouThreshold
                # if iou < options.iouThreshold:
                #    print('IOU warning: {},{}'.format(iou,options.iouThreshold))

                assert instance.filename in RepeatDetectionResults.filenameToRow
                iRow = RepeatDetectionResults.filenameToRow[instance.filename]
                row = detectionResults.iloc[iRow]
                rowDetections = row['detections']
                detectionToModify = rowDetections[instance.iDetection]

                # Make sure the bounding box matches
                assert (instanceBbox[0:3] == detectionToModify['bbox'][0:3])

                # Make the probability negative, if it hasn't been switched by
                # another bounding box
                if detectionToModify['conf'] >= 0:
                    detectionToModify['conf'] = -1 * detectionToModify['conf']
                    nBboxChanges += 1

            # ...for each instance

        # ...for each detection

    # ...for each directory

    # Update maximum probabilities

    # For each row...
    nProbChanges = 0
    nProbChangesToNegative = 0
    nProbChangesAcrossThreshold = 0

    for iRow, row in detectionResults.iterrows():

        detections = row['detections']
        if isinstance(detections, float):
            assert isinstance(row['failure'], str)
            continue

        if len(detections) == 0:
            continue

        maxPOriginal = float(row['max_detection_conf'])

        # No longer strictly true; sometimes I run RDE on RDE output
        # assert maxPOriginal >= 0
        assert maxPOriginal >= -1.0

        maxP = None
        nNegative = 0

        for iDetection, detection in enumerate(detections):

            p = detection['conf']

            if p < 0:
                nNegative += 1

            if (maxP is None) or (p > maxP):
                maxP = p

        if abs(maxP - maxPOriginal) > 1e-3:

            # We should only be making detections *less* likely
            assert maxP < maxPOriginal
            # row['max_confidence'] = str(maxP)
            detectionResults.at[iRow, 'max_detection_conf'] = maxP

            nProbChanges += 1

            if (maxP < 0) and (maxPOriginal >= 0):
                nProbChangesToNegative += 1

            if (maxPOriginal >=
                    options.confidenceMin) and (maxP < options.confidenceMin):
                nProbChangesAcrossThreshold += 1

            # Negative probabilities should be the only reason maxP changed, so
            # we should have found at least one negative value
            assert nNegative > 0

        # ...if there was a meaningful change to the max probability for this row

    # ...for each row

    # If we're also writing output...
    if outputFilename is not None and len(outputFilename) > 0:
        write_api_results(detectionResults, RepeatDetectionResults.otherFields,
                          outputFilename)

    print(
        'Finished updating detection table\nChanged {} detections that impacted {} maxPs ({} to negative) ({} across confidence threshold)'
        .format(nBboxChanges, nProbChanges, nProbChangesToNegative,
                nProbChangesAcrossThreshold))

    return detectionResults
Пример #5
0
def find_matches_in_directory(dirName, options, rowsByDirectory):
    """
    Find all unique detections in [dirName].
    
    Returns a list of DetectionLocation objects.
    """

    if options.pbar is not None:
        options.pbar.update()

    # List of DetectionLocations
    # candidateDetections = []

    # Create a tree to store candidate detections
    candidateDetectionsIndex = pyqtree.Index(bbox=(-0.1, -0.1, 1.1, 1.1))

    # Each image in this folder is a row in "rows"
    rows = rowsByDirectory[dirName]

    if options.maxImagesPerFolder is not None and len(
            rows) > options.maxImagesPerFolder:
        print(
            'Ignoring directory {} because it has {} images (limit set to {})'.
            format(dirName, len(rows), options.maxImagesPerFolder))
        return []

    if options.includeFolders is not None:
        assert options.excludeFolders is None, 'Cannot specify include and exclude folder lists'
        if dirName not in options.includeFolders:
            print('Ignoring folder {}, not in inclusion list'.format(dirName))
            return []

    if options.excludeFolders is not None:
        assert options.includeFolders is None, 'Cannot specify include and exclude folder lists'
        if dirName in options.excludeFolders:
            print('Ignoring folder {}, on exclusion list'.format(dirName))
            return []

    # For each image in this directory
    #
    # iDirectoryRow = 0; row = rows.iloc[iDirectoryRow]
    #
    # iDirectoryRow is a pandas index, so it may not start from zero;
    # for debugging, we maintain i_iteration as a loop index.
    i_iteration = -1
    n_boxes_evaluated = 0

    for iDirectoryRow, row in rows.iterrows():

        i_iteration += 1
        # print('Searching row {} of {} (index {}) in dir {}'.format(i_iteration,len(rows),iDirectoryRow,dirName))
        filename = row['file']
        if not ct_utils.is_image_file(filename):
            continue

        if 'max_detection_conf' not in row or 'detections' not in row or row[
                'detections'] is None:
            print('Skipping row {}'.format(iDirectoryRow))
            continue

        # Don't bother checking images with no detections above threshold
        maxP = float(row['max_detection_conf'])
        if maxP < options.confidenceMin:
            continue

        # Array of dicts, where each element is
        # {
        #   'category': '1',  # str value, category ID
        #   'conf': 0.926,  # confidence of this detections
        #
        #    # (x_min, y_min) is upper-left, all in relative coordinates
        #   'bbox': [x_min, y_min, width_of_box, height_of_box]
        #
        # }
        detections = row['detections']
        if isinstance(detections, float):
            assert isinstance(row['failure'], str)
            print('Skipping failed image {} ({})'.format(
                filename, row['failure']))
            continue

        assert len(detections) > 0

        # For each detection in this image
        for iDetection, detection in enumerate(detections):

            n_boxes_evaluated += 1

            if detection is None:
                print('Skipping detection {}'.format(iDetection))
                continue

            assert 'category' in detection and 'conf' in detection and 'bbox' in detection

            confidence = detection['conf']

            # This is no longer strictly true; I sometimes run RDE in stages, so
            # some probabilities have already been made negative
            #
            # assert confidence >= 0.0 and confidence <= 1.0

            assert confidence >= -1.0 and confidence <= 1.0

            if confidence < options.confidenceMin:
                continue
            if confidence > options.confidenceMax:
                continue

            # Optionally exclude some classes from consideration as suspicious
            if len(options.excludeClasses) > 0:
                iClass = int(detection['category'])
                if iClass in options.excludeClasses:
                    continue

            bbox = detection['bbox']
            confidence = detection['conf']

            # Is this detection too big to be suspicious?
            w, h = bbox[2], bbox[3]

            if (w == 0 or h == 0):
                # print('Illegal zero-size bounding box on image {}'.format(filename))
                continue

            area = h * w

            # These are relative coordinates
            assert area >= 0.0 and area <= 1.0, 'Illegal bounding box area {}'.format(
                area)

            if area > options.maxSuspiciousDetectionSize:
                # print('Ignoring very large detection with area {}'.format(area))
                continue

            category = detection['category']

            instance = IndexedDetection(iDetection=iDetection,
                                        filename=row['file'],
                                        bbox=bbox,
                                        confidence=confidence,
                                        category=category)

            bFoundSimilarDetection = False

            rtree_rect = detection_rect_to_rtree_rect(bbox)

            # This will return candidates of all classes
            overlappingCandidateDetections =\
                candidateDetectionsIndex.intersect(rtree_rect)

            overlappingCandidateDetections.sort(key=lambda x: x.id,
                                                reverse=False)

            # For each detection in our candidate list
            for iCandidate, candidate in enumerate(
                    overlappingCandidateDetections):

                # Don't match across categories
                if candidate.category != category:
                    continue

                # Is this a match?
                try:
                    iou = ct_utils.get_iou(bbox, candidate.bbox)
                except Exception as e:
                    print(
                        'Warning: IOU computation error on boxes ({},{},{},{}),({},{},{},{}): {}'
                        .format(bbox[0], bbox[1], bbox[2], bbox[3],
                                candidate.bbox[0], candidate.bbox[1],
                                candidate.bbox[2], candidate.bbox[3], str(e)))
                    continue

                if iou >= options.iouThreshold:

                    bFoundSimilarDetection = True

                    # If so, add this example to the list for this detection
                    candidate.instances.append(instance)

                    # We *don't* break here; we allow this instance to possibly
                    # match multiple candidates.  There isn't an obvious right or
                    # wrong here.

            # ...for each detection on our candidate list

            # If we found no matches, add this to the candidate list
            if not bFoundSimilarDetection:

                candidate = DetectionLocation(instance=instance,
                                              detection=detection,
                                              relativeDir=dirName,
                                              category=category,
                                              id=i_iteration)

                # candidateDetections.append(candidate)

                # pyqtree
                candidateDetectionsIndex.insert(item=candidate,
                                                bbox=rtree_rect)

        # ...for each detection

    # ...for each row

    # Get all candidate detections

    candidateDetections = candidateDetectionsIndex.intersect(
        [-100, -100, 100, 100])

    # print('Found {} candidate detections for folder {}'.format(
    #    len(candidateDetections),dirName))

    # For debugging only, it's convenient to have these sorted
    # as if they had never gone into a tree structure.  Typically
    # this is in practce a sort by filename.
    candidateDetections.sort(key=lambda x: x.id, reverse=False)

    return candidateDetections