class CreateDetectionAnchors():
    def __init__(self, train_path):
        self.xmlProcess = XMLProcess()
        self.image_process = ImageProcess()
        self.detection_sample = DetectionSample(train_path,
                                                detect2d_config.className)
        self.detection_sample.read_sample()
        self.dataset_process = DetectionDataSetProcess()

    def get_anchors(self, number):
        wh_numpy = self.get_width_height()
        # Kmeans calculation
        k = cluster.vq.kmeans(wh_numpy, number)[0]
        k = k[np.argsort(k.prod(1))]  # sort small to large
        # Measure IoUs
        iou = np.stack([self.compute_iou(wh_numpy, x) for x in k], 0)
        biou = iou.max(0)[0]  # closest anchor IoU
        print('Best possible recall: %.3f' %
              (biou > 0.2635).float().mean())  # BPR (best possible recall)

        # Print
        print(
            'kmeans anchors (n=%g, img_size=%g, IoU=%.2f/%.2f/%.2f-min/mean/best): '
            % (number, detect2d_config.imgSize, biou.min(), iou.mean(),
               biou.mean()),
            end='')
        for i, x in enumerate(k):
            print('%i,%i' % (round(x[0]), round(x[1])),
                  end=',  ' if i < len(k) - 1 else '\n')

    def get_width_height(self):
        count = self.detection_sample.get_sample_count()
        result = []
        for index in range(count):
            img_path, label_path = self.detection_sample.get_sample_path(index)
            src_image, rgb_image = self.image_process.readRgbImage(img_path)
            _, _, boxes = self.xmlProcess.parseRectData(label_path)
            rgb_image, labels = self.dataset_process.resize_dataset(
                rgb_image, detect2d_config.imgSize, boxes,
                detect2d_config.className)
            temp = np.zeros((len(labels), 2), dtype=np.float32)
            for index, object in enumerate(labels):
                temp[index, :] = np.array([object.width(), object.height()])
            result.append(temp)
        return np.concatenate(result, axis=0)

    def compute_iou(self, list_x, x2):
        result = np.zeros((len(list_x), 1), dtype=np.float32)
        for index, x1 in enumerate(list_x):
            min_w = min(x1[0], x2[0])
            min_h = min(x1[0], x2[1])
            iou = (min_w * min_h) / (x1[0] * x1[1] + x2[0] * x2[1] -
                                     min_w * min_h)
            result[index] = iou
        return result
Пример #2
0
class CalculateMeanAp():
    def __init__(self, val_path, class_names):
        self.class_names = class_names
        self.xmlProcess = XMLProcess()
        self.detection_samples = DetectionSample(val_path, class_names)
        self.image_annotation_list = self.detection_samples.get_image_and_label_list(
            val_path)
        self.use_07_metric = False

    def eval(self, result_dir):
        aps = []
        ious = []
        for i, name in enumerate(self.class_names):
            if name == '__background__':
                continue
            file_path = os.path.join(result_dir, "%s.txt" % name)
            recall, precision, ap = self.calculate_ap(file_path, name, 0.5)
            aps += [ap]
            # ious += [avg_iou]

        self.print_evaluation(aps)
        return np.mean(aps), aps

    def print_evaluation(self, aps):
        print('Mean AP = {:.4f}'.format(np.mean(aps)))
        print('~~~~~~~~')
        print('Results:')
        for i, ap in enumerate(aps):
            print(self.class_names[i] + ': ' + '{:.3f}'.format(ap))
            # print(self.className[i] + '_iou: ' + '{:.3f}'.format(ious[aps.index(ap)]))

        print('mAP: ' + '{:.3f}'.format(np.mean(aps)))
        # print('Iou acc: ' + '{:.3f}'.format(np.mean(ious)))
        print('~~~~~~~~')

    def calculate_ap(self, result_path, class_name, iou_thresh=0.5):
        if not os.path.exists(result_path):
            return 0, 0, 0

        recs = self.get_data_boxes()
        class_recs, npos = self.get_gt_boxes(recs, class_name)
        image_ids, sorted_scores, BB = self.get_detect_result(result_path)
        tp, fp, iou = self.get_tp_fp(image_ids, class_recs, BB, iou_thresh)

        # compute precision recall
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        recall = tp / float(npos)
        # avg_iou = sum(iou) / len(iou)
        # avoid divide by zero in case the first detection matches a difficult
        # ground truth
        precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
        ap = self.get_ap(recall, precision)
        return recall, precision, ap

    def get_data_boxes(self):
        recs = {}
        for image_path, annotation_path in self.image_annotation_list:
            path, filename_post = os.path.split(image_path)
            #fileName, post = os.path.splitext(fileNameAndPost)
            _, _, boxes = self.xmlProcess.parseRectData(annotation_path)
            recs[filename_post] = boxes
        return recs

    def get_gt_boxes(self, recs, class_name):
        # extract gt objects for this class
        class_recs = {}
        npos = 0
        for imageName in recs.keys():
            R = [box for box in recs[imageName] if box.name == class_name]
            bbox = np.array([x.getVector() for x in R])
            difficult = np.array([x.difficult for x in R]).astype(np.bool)
            det = [False] * len(R)
            npos = npos + sum(~difficult)
            class_recs[imageName] = {
                'bbox': bbox,
                'difficult': difficult,
                'det2d': det
            }
        return class_recs, npos

    def get_detect_result(self, result_path):
        # read dets
        with open(result_path, 'r') as f:
            lines = f.readlines()

        splitlines = [x.strip().split(' ') for x in lines]
        image_ids = [x[0] for x in splitlines]
        confidence = np.array([float(x[1]) for x in splitlines])
        BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

        # sort by confidence
        sorted_ind = np.argsort(-confidence)
        sorted_scores = np.sort(-confidence)
        BB = BB[sorted_ind, :]
        image_ids = [image_ids[x] for x in sorted_ind]
        return image_ids, sorted_scores, BB

    def calculate_iou(self, BBGT, bb):
        ovmax = -np.inf
        jmax = None
        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)
        return ovmax, jmax

    def get_tp_fp(self, image_ids, class_recs, BB, iou_thresh):
        nd = len(image_ids)
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        iou = []
        for d in range(nd):
            R = class_recs[image_ids[d]]
            bb = BB[d, :].astype(float)
            BBGT = R['bbox'].astype(float)
            ovmax, jmax = self.calculate_iou(BBGT, bb)
            if ovmax > iou_thresh:
                if not R['difficult'][jmax]:
                    if not R['det2d'][jmax]:
                        tp[d] = 1.
                        R['det2d'][jmax] = 1
                        iou.append(ovmax)
                    else:
                        fp[d] = 1.
            else:
                fp[d] = 1.
        return tp, fp, iou

    def get_ap(self, recall, precision):
        """
        ap = voc_ap(rec, prec, [use_07_metric])
                Compute VOC AP given precision and recall.
                If use_07_metric is true, uses the
                VOC 07 11 point method (default:False).
        """
        if self.use_07_metric:
            # 11 point metric
            ap = 0.
            for t in np.arange(0., 1.1, 0.1):
                if np.sum(recall >= t) == 0:
                    p = 0
                else:
                    p = np.max(precision[recall >= t])
                ap = ap + p / 11.
        else:
            # correct AP calculation
            # first append sentinel values at the end
            mrec = np.concatenate(([0.], recall, [1.]))
            mpre = np.concatenate(([0.], precision, [0.]))

            # compute the precision envelope
            for i in range(mpre.size - 1, 0, -1):
                mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

            # to calculate area under PR curve, look for points
            # where X axis (recall) changes value
            i = np.where(mrec[1:] != mrec[:-1])[0]

            # and sum (\Delta recall) * prec
            ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
        return ap
Пример #3
0
class CreateDetectionSample():
    def __init__(self):
        self.dirProcess = DirProcess()
        self.xmlProcess = XMLProcess()
        self.annotation_post = ".xml"

    def createBalanceSample(self, inputTrainPath, outputPath):
        if not os.path.exists(outputPath):
            os.makedirs(outputPath)
        path, _ = os.path.split(inputTrainPath)
        annotationDir = os.path.join(path, "../Annotations")
        imagesDir = os.path.join(path, "../JPEGImages")
        writeFile = self.createWriteFile(outputPath)
        for fileNameAndPost in self.dirProcess.getFileData(inputTrainPath):
            fileName, post = os.path.splitext(fileNameAndPost)
            annotationFileName = fileName + self.annotation_post
            annotationPath = os.path.join(annotationDir, annotationFileName)
            imagePath = os.path.join(imagesDir, fileNameAndPost)
            print(imagePath, annotationPath)
            if os.path.exists(annotationPath) and \
               os.path.exists(imagePath):
                _, _, boxes = self.xmlProcess.parseRectData(annotationPath)
                allNames = [
                    box.name for box in boxes
                    if box.name in detect2d_config.className
                ]
                names = set(allNames)
                print(names)
                for className in names:
                    writeFile[className].write(fileNameAndPost + "\n")

    def createTrainAndTest(self, inputDir, outputPath, probability):

        annotationsDir = os.path.join(inputDir, "../Annotations")
        saveTrainFilePath = os.path.join(outputPath, "train.txt")
        saveTestFilePath = os.path.join(outputPath, "val.txt")
        saveTrainFilePath = open(saveTrainFilePath, "w")
        saveTestFilePath = open(saveTestFilePath, "w")

        imageList = list(self.dirProcess.getDirFiles(inputDir, "*.*"))
        random.shuffle(imageList)
        for imageIndex, imagePath in enumerate(imageList):
            print(imagePath)
            image = cv2.imdecode(np.fromfile(imagePath, dtype=np.uint8),
                                 cv2.IMREAD_GRAYSCALE)
            path, file_name_and_post = os.path.split(imagePath)
            imageName, post = os.path.splitext(file_name_and_post)
            xmlPath = os.path.join(annotationsDir,
                                   "%s%s" % (imageName, self.annotation_post))
            if (image is not None) and os.path.exists(xmlPath):
                if (imageIndex + 1) % probability == 0:
                    saveTestFilePath.write("%s\n" % file_name_and_post)
                else:
                    saveTrainFilePath.write("%s\n" % file_name_and_post)
        saveTrainFilePath.close()
        saveTestFilePath.close()

    def createWriteFile(self, outputPath):
        result = {}
        for className in detect2d_config.className:
            classImagePath = os.path.join(outputPath, className + ".txt")
            result[className] = open(classImagePath, "w")
        return result
Пример #4
0
class DetectionTrainDataloader(DataLoader):
    def __init__(self,
                 train_path,
                 class_name,
                 batch_size=1,
                 image_size=(768, 320),
                 multi_scale=False,
                 is_augment=False,
                 balanced_sample=False):
        super().__init__()
        self.className = class_name
        self.multi_scale = multi_scale
        self.is_augment = is_augment
        self.balanced_sample = balanced_sample
        self.batch_size = batch_size
        self.image_size = image_size

        self.detection_sample = DetectionSample(train_path, class_name,
                                                balanced_sample)
        self.detection_sample.read_sample()
        self.xmlProcess = XMLProcess()
        self.image_process = ImageProcess()
        self.dataset_process = DetectionDataSetProcess()
        self.dataset_augment = DetectionDataAugment()

        self.nF = self.detection_sample.get_sample_count()
        self.nB = math.ceil(self.nF / batch_size)  # number of batches

    def __iter__(self):
        self.count = -1
        self.detection_sample.shuffle_sample()
        return self

    def __next__(self):
        self.count += 1
        if self.count == self.nB:
            raise StopIteration
        numpy_images = []
        numpy_labels = []

        class_index = self.get_random_class()
        start_index = self.detection_sample.get_sample_start_index(
            self.count, self.batch_size, class_index)
        width, height = self.get_image_size()

        stop_index = start_index + self.batch_size
        for temp_index in range(start_index, stop_index):
            img_path, label_path = self.detection_sample.get_sample_path(
                temp_index, class_index)
            src_image, rgb_image = self.image_process.readRgbImage(img_path)
            _, _, boxes = self.xmlProcess.parseRectData(label_path)

            rgb_image, labels = self.dataset_process.resize_dataset(
                rgb_image, (width, height), boxes, self.className)
            rgb_image, labels = self.dataset_augment.augment(rgb_image, labels)
            rgb_image, labels = self.dataset_process.normaliza_dataset(
                rgb_image, labels, (width, height))

            labels = self.dataset_process.change_outside_labels(labels)

            numpy_images.append(rgb_image)

            torch_labels = self.dataset_process.numpy_to_torch(labels, flag=0)
            numpy_labels.append(torch_labels)

        numpy_images = np.stack(numpy_images)
        torch_images = self.all_numpy_to_tensor(numpy_images)

        return torch_images, numpy_labels

    def __len__(self):
        return self.nB  # number of batches

    def get_random_class(self):
        class_index = None
        if self.balanced_sample:
            class_index = np.random.randint(0, len(self.className))
            print("loading labels {}".format(self.className[class_index]))
        return class_index

    def get_image_size(self):
        if self.multi_scale:
            # Multi-Scale YOLO Training
            print("wrong code for MultiScale")
            width = random.choice(range(10, 20)) * 32  # 320 - 608 pixels
            scale = float(self.image_size[0]) / float(self.image_size[1])
            height = int(round(float(width / scale) / 32.0) * 32)
        else:
            # Fixed-Scale YOLO Training
            width = self.image_size[0]
            height = self.image_size[1]
        return width, height