Пример #1
0
 def bbox_iou(bbox1, bbox2):
     """
     :param bbox1:
         [13, 13, 5, 4] / [x, y, w, h];
     :param bbox2:
         [13, 13, 5, 4] / [x, y, w, h];
     :return:
         [13, 13, 5];
     """
     bbox1_area = bbox1[..., 2] * bbox1[..., 3]
     bbox2_area = bbox2[..., 2] * bbox2[..., 3]
     # assert bbox1.shape == bbox2.shape
     bbox2 = xywh2xyxy(bbox2)
     bbox1 = xywh2xyxy(bbox1)
     # [13, 13, 5] & [13, 13, 5] -> [13, 13, 5]
     intersection_xmin = torch.max(bbox1[..., 0], bbox2[..., 0])
     intersection_ymin = torch.max(bbox1[..., 1], bbox2[..., 1])
     intersection_xmax = torch.min(bbox1[..., 2], bbox2[..., 2])
     intersection_ymax = torch.min(bbox1[..., 3], bbox2[..., 3])
     # [13, 13, 5] & [13, 13, 5] -> [13, 13, 5]
     intersection_w = torch.max(intersection_xmax - intersection_xmin, torch.tensor(0., device=opt.device))
     intersection_h = torch.max(intersection_ymax - intersection_ymin, torch.tensor(0., device=opt.device))
     intersection_area = intersection_w * intersection_h
     # [13, 13, 5] & ([13, 13, 5] & [13, 13, 5] & [13, 13, 5]) -> [13, 13, 5]
     ious = intersection_area / (bbox1_area + bbox2_area - intersection_area + 1e-10)
     # ious shape: [13, 13, 5]
     return ious
Пример #2
0
def dump_labelme(df, output_dir):

    os.makedirs(output_dir, exist_ok=True)
    # directly save results to output folder
    for _, row in df.iterrows():

        file_name = row['file_name']
        bboxes = row['bboxes']
        img_width = row['width']
        img_height = row['height']

        _template = {
            "version": "1.0.0",
            "flags": {},
            "shapes": [],
            "imagePath": file_name,
            "imageData": None,
            "imageHeight": img_height,
            "imageWidth": img_width,
        }

        for bbox in bboxes:
            x1, y1, x2, y2 = xywh2xyxy(bbox)
            _template['shapes'].append({
                'label': 'person',
                'points': [[x1, y1], [x2, y2]],
                "group_id": None,
                "shape_type": "rectangle",
                "flags": {},
            })
        output_filename = join(output_dir, file_name.rstrip('.jpg') + '.json')
        with open(output_filename, 'w') as json_fp:
            json_str = json.dumps(_template)
            json_fp.write(json_str)
Пример #3
0
def extract_boxes(path='../coco128/'):  # from utils.datasets import *; extract_boxes('../coco128')
    # Convert detection dataset into classification dataset, with one directory per class

    path = Path(path)  # images dir
    shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None  # remove existing
    files = list(path.rglob('*.*'))
    n = len(files)  # number of files
    for im_file in tqdm(files, total=n):
        if im_file.suffix[1:] in img_formats:
            # image
            im = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGB
            h, w = im.shape[:2]

            # labels
            lb_file = Path(img2label_paths([str(im_file)])[0])
            if Path(lb_file).exists():
                with open(lb_file, 'r') as f:
                    lb = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)  # labels

                for j, x in enumerate(lb):
                    c = int(x[0])  # class
                    f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg'  # new filename
                    if not f.parent.is_dir():
                        f.parent.mkdir(parents=True)

                    b = x[1:] * [w, h, w, h]  # box
                    # b[2:] = b[2:].max()  # rectangle to square
                    b[2:] = b[2:] * 1.2 + 3  # pad
                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)

                    b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
                    b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
                    assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
Пример #4
0
def comput_loss(proc_pred,
                annotations_gt,
                targets,
                iou_th=0.5,
                giou_ratio=0.5):
    #procpred = process_preds(model_out[0], int(np.sqrt(out.shape[1])) , 256, 56)
    boxloss, closs, objloss = torch.tensor([0]).float(), torch.tensor(
        [0]).float(), torch.tensor([0]).float()
    for j in range(len(proc_pred)):
        for i, gt in enumerate(annotations_gt[j]):
            # get ious+
            ious = bbox_iou(gt.float(), xywh2xyxy(procpred[j, :, :4]).float())
            # get reelvant predictions
            pertinent = torch.where(ious > iou_th)[0]

            if len(pertinent):
                best_id = torch.max(ious[pertinent], 0)[1]
                best_bb = procpred[j, best_id, :]
                closs += pred_criterion(best_bb[5:].unsqueeze(0),
                                        torch.tensor(targets[i]))
                boxloss += (1 - ious[pertinent]).mean()

            trgt_objectness = (
                1 - giou_ratio) + giou_ratio * ious.detach().clamp(0)
            objloss += obj_criterion(procpred[j, ..., 4], trgt_objectness)

    loss = 2 * boxloss + closs + 2 * objloss
    loss_print = dict(box=boxloss.detach(),
                      pred=closs.detach(),
                      obj=objloss.detach())
    return loss, loss_print
Пример #5
0
 def _iou(gt_boxes, base_anchors):
     """
     :param gt_boxes: [M, 4] / [ctr_x, ctr_y, w, h]
     :param base_anchors: [N, 2] / [w, h]
     :return: [M,]
     """
     dummy_anchors = np.zeros(shape=[len(base_anchors), 4])
     dummy_gt_boxes = np.zeros(shape=[len(gt_boxes), 4])
     dummy_anchors[:, 2:] = base_anchors
     dummy_gt_boxes[:, 2:] = gt_boxes[:, 2:]
     dummy_anchors = xywh2xyxy(dummy_anchors)
     dummy_gt_boxes = xywh2xyxy(dummy_gt_boxes)
     ious = iou_general(dummy_gt_boxes[:, None, :], dummy_anchors)
     # fig, ax = plt.subplots(1)
     # plot_boxes(dummy_anchors, ax, 'r')
     # plot_boxes(dummy_gt_boxes, ax, 'b')
     # plot_boxes(dummy_anchors[ious.argmax(axis=-1)], ax, 'g')
     # plt.show()
     return ious
Пример #6
0
    def decode(self, preds, img_size=None):
        '''
        对得到的self.S x self.S x self.pred_c大小的preds进行decode,得到
            object的类别和坐标
        args:
            preds,self.S x self.S x self.pred_c,是网络的输出;
            img_size,图片的大小,如果不是None,则得到的坐标是真实的坐标,如果
                为None,则得到的坐标是归一化到0-1区间的;
        returns:
            res_c,预测的类别;
            res_s,用于nms使用的score,其代表的含义是在确定为object的条件下属于
                此类的概率,和与真实对象IoU的乘积;
            res_l,预测框的标签;
            (以上得到的都是tensor)
        '''
        confidence = preds[..., [4, 9]]
        mask1 = confidence > self.conf_thre
        mask2 = confidence == confidence.max()
        mask = (mask1 + mask2).gt(0)
        if mask.sum() == 0:
            return None
        indx = mask.nonzero()[:, :2][:, [1, 0]].float()
        # lt = indx * self.cell_size
        # indx[:, 2] = indx[:, 2] *

        p_shape = list(preds.shape)
        preds_locs_conf = preds[:, :, :(self.B * 5)].view(
            *p_shape[:-1], self.B, 5)
        preds_locs = preds_locs_conf[..., :4]
        preds_conf = preds_locs_conf[..., 4]
        # 计算输出的class应该有的维度,这里将其重复两次分别对应两个bboxes
        pcs = list(preds.shape[:2]) + [self.B, self.C]
        preds_class = preds[:, :, (self.B * 5):].unsqueeze(2).expand(*pcs)

        remain_locs = xywh2xyxy(preds_locs[mask], self.cell_size, indx)
        remain_conf = preds_conf[mask]
        remain_class = preds_class[mask]

        # 进行nms,使用的是预测最大的类别的概率*confidence来作为score
        #   其=pr(class_i)*IoU
        probs, cls_index = remain_class.max(1)
        scores = probs * remain_conf
        keep = nms(remain_locs, scores, self.nms_thre)

        res_c, res_s, res_l = cls_index[keep], scores[keep], remain_locs[keep]
        if img_size is not None:
            res_l = res_l * torch.tensor(
                [list(img_size) * 2], dtype=torch.float, device=res_l.device)

        return res_c, res_s, res_l
Пример #7
0
    def forward(self, x):
        """
        Args
          x: (Tensor) detection feature map, with size [bs, num_bboxes, 5 + nC]

        Returns
          detections: (Tensor) detection result with size [num_bboxes, [image_batch_idx, 4 offsets, p_obj, max_conf, cls_idx]]
        """
        bs, num_bboxes, num_attrs = x.size()
        detections = torch.Tensor().cuda()

        for idx in range(bs):
            pred = x[idx]

            try:
                non_zero_pred = pred[pred[:, 4] > self.conf_thresh]
                non_zero_pred[:, :4] = xywh2xyxy(non_zero_pred[:, :4])
                max_score, max_idx = torch.max(non_zero_pred[:, 5:], 1)
                max_idx = max_idx.float().unsqueeze(1)
                max_score = max_score.float().unsqueeze(1)
                non_zero_pred = torch.cat(
                    (non_zero_pred[:, :5], max_score, max_idx), 1)
                classes = torch.unique(non_zero_pred[:, -1])
            except Exception:  # no object detected
                continue

            for cls in classes:
                cls_pred = non_zero_pred[non_zero_pred[:, -1] == cls]
                conf_sort_idx = torch.sort(cls_pred[:, 5], descending=True)[1]
                cls_pred = cls_pred[conf_sort_idx]
                max_preds = []
                while cls_pred.size(0) > 0:
                    max_preds.append(cls_pred[0].unsqueeze(0))
                    ious = IoU(max_preds[-1], cls_pred)
                    cls_pred = cls_pred[ious < self.nms_thresh]

                if len(max_preds) > 0:
                    max_preds = torch.cat(max_preds).data
                    batch_idx = max_preds.new(max_preds.size(0), 1).fill_(idx)
                    seq = (batch_idx, max_preds)
                    detections = torch.cat(
                        seq, 1) if detections.size(0) == 0 else torch.cat(
                            (detections, torch.cat(seq, 1)))

        return detections
Пример #8
0
    def loss(self, predictions, targets, stats):
        assert type(predictions) == list
        loss = {}
        for i, (p, t) in enumerate(zip(predictions, targets)):
            assert p.shape == t.shape

            l = {}
            batch_size = t.shape[0]

            t = t.permute(0, 2, 3, 1)
            p = p.permute(0, 2, 3, 1)

            t = t.contiguous().view(batch_size, -1, self.num_features)
            p = p.contiguous().view(batch_size, -1, self.num_features)

            img_idx = torch.arange(batch_size,
                                   dtype=torch.float,
                                   device=self.device)
            img_idx = img_idx.reshape(-1, 1) * p.shape[2]
            t[:, :, 0] += 2. * img_idx
            p[:, :, 0] += 2. * img_idx
            img_idx = torch.arange(batch_size,
                                   dtype=torch.float,
                                   device=self.device)
            img_idx = img_idx.reshape(-1, 1) * p.shape[1]
            t[:, :, 1] += 2. * img_idx
            p[:, :, 1] += 2. * img_idx

            t = t.contiguous().view(-1, self.num_features)
            p = p.contiguous().view(-1, self.num_features)

            obj_mask = torch.nonzero(t[:, 4]).flatten()
            num_obj = len(obj_mask)

            if obj_mask.numel() > 0:
                p_xyxy = xywh2xyxy(p[:, :4].detach())
                t_xyxy = xywh2xyxy(t[obj_mask, :4])

                all_ious = jaccard(p_xyxy, t_xyxy)
                ious, _ = torch.max(all_ious, dim=1)
                stats['avg_obj_iou'].append(
                    all_ious[obj_mask].diag().mean().item())

                mask = torch.nonzero(ious > self.noobj_iou_threshold).squeeze()
                t[mask, 4] = 1.
                noobj_mask = torch.nonzero(t[:, 4] == 0.).squeeze()

                l['coord'] = nn.MSELoss(reduction='sum')(p[obj_mask, 0],
                                                         t[obj_mask, 0])
                l['coord'] += nn.MSELoss(reduction='sum')(p[obj_mask, 1],
                                                          t[obj_mask, 1])
                l['coord'] += nn.MSELoss(reduction='sum')(torch.sqrt(
                    p[obj_mask, 2]), torch.sqrt(t[obj_mask, 2]))
                l['coord'] += nn.MSELoss(reduction='sum')(torch.sqrt(
                    p[obj_mask, 3]), torch.sqrt(t[obj_mask, 3]))
                l['coord'] *= LAMBDA_COORD / batch_size

                if self.iteration * self.batch_size < 12800:
                    l['bias'] = nn.MSELoss(reduction='sum')(p[noobj_mask, 0],
                                                            t[noobj_mask, 0])
                    l['bias'] += nn.MSELoss(reduction='sum')(p[noobj_mask, 1],
                                                             t[noobj_mask, 1])
                    l['bias'] += nn.MSELoss(reduction='sum')(torch.sqrt(
                        p[noobj_mask, 2]), torch.sqrt(t[noobj_mask, 2]))
                    l['bias'] += nn.MSELoss(reduction='sum')(torch.sqrt(
                        p[noobj_mask, 3]), torch.sqrt(t[noobj_mask, 3]))

                    l['bias'] *= 0.1 / batch_size

                p[obj_mask, 5:] = F.log_softmax(p[obj_mask, 5:], dim=-1)
                t_long = torch.argmax(t[obj_mask, 5:], dim=1)
                if USE_CROSS_ENTROPY:
                    l['class'] = nn.NLLLoss(reduction='sum')(p[obj_mask, 5:],
                                                             t_long)
                else:
                    l['class'] = nn.MSELoss(reduction='sum')(torch.exp(
                        p[obj_mask, 5:]), t[obj_mask, 5:])
                l['class'] *= LAMBDA_CLASS / batch_size
                stats['avg_class'].append(
                    torch.exp(p[obj_mask, 5 + t_long]).mean().item())

                # l['object'] = nn.MSELoss(reduction='sum')(p[obj_mask, 4],
                #                                           all_ious[obj_mask, torch.arange(num_obj)].detach())
                l['object'] = nn.MSELoss(reduction='sum')(p[obj_mask, 4],
                                                          t[obj_mask, 4])
                l['object'] *= LAMBDA_OBJ / batch_size
                stats['avg_pobj'].append(p[obj_mask, 4].mean().item())

                l['no_object'] = nn.MSELoss(reduction='sum')(p[noobj_mask, 4],
                                                             t[noobj_mask, 4])
                l['no_object'] *= LAMBDA_NOOBJ / batch_size
                stats['avg_pnoobj'].append(p[noobj_mask, 4].mean().item())
            else:
                l['object'] = torch.tensor([0.], device=self.device)
                l['coord'] = torch.tensor([0.], device=self.device)
                l['class'] = torch.tensor([0.], device=self.device)
                l['no_object'] = LAMBDA_NOOBJ / batch_size * nn.MSELoss(
                    reduction='sum')(p[:, 4], t[:, 4])
                if self.iteration * self.batch_size < 12800:
                    l['bias'] = nn.MSELoss(reduction='sum')(p[:, 0], t[:, 0])
                    l['bias'] += nn.MSELoss(reduction='sum')(p[:, 1], t[:, 1])
                    l['bias'] += nn.MSELoss(reduction='sum')(torch.sqrt(p[:,
                                                                          2]),
                                                             torch.sqrt(t[:,
                                                                          2]))
                    l['bias'] += nn.MSELoss(reduction='sum')(torch.sqrt(p[:,
                                                                          3]),
                                                             torch.sqrt(t[:,
                                                                          3]))
                    l['bias'] *= 0.1 / batch_size

            l['total'] = (l['coord'] + l['class'] + l['object'] +
                          l['no_object'])
            for k, v, in l.items():
                try:
                    loss[k] = loss[k] + v
                except KeyError:
                    loss[k] = v

        return loss, stats
Пример #9
0
    def __init__(self,
                 path,
                 img_size=416,
                 batch_size=16,
                 augment=False,
                 hyp=None,
                 rect=False,
                 image_weights=False,
                 cache_images=False,
                 single_cls=False,
                 pad=0.0):
        try:
            path = str(Path(path))  # os-agnostic
            parent = str(Path(path).parent) + os.sep
            if os.path.isfile(path):  # file
                with open(path, 'r') as f:
                    f = f.read().splitlines()
                    f = [
                        x.replace('./', parent) if x.startswith('./') else x
                        for x in f
                    ]  # local to global path
            elif os.path.isdir(path):  # folder
                f = glob.iglob(path + os.sep + '*.*')
            else:
                raise Exception('%s does not exist' % path)
            self.img_files = [
                x.replace('/', os.sep) for x in f
                if os.path.splitext(x)[-1].lower() in img_formats
            ]
        except:
            raise Exception('Error loading data from %s. See %s' %
                            (path, help_url))

        n = len(self.img_files)
        assert n > 0, 'No images found in %s. See %s' % (path, help_url)
        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
        nb = bi[-1] + 1  # number of batches

        self.n = n  # number of images
        self.batch = bi  # batch index of image
        self.img_size = img_size
        self.augment = augment
        self.hyp = hyp
        self.image_weights = image_weights
        self.rect = False if image_weights else rect
        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)

        # Define labels
        self.label_files = [
            x.replace('images',
                      'labels').replace(os.path.splitext(x)[-1], '.txt')
            for x in self.img_files
        ]

        # Read image shapes (wh)
        sp = path.replace('.txt', '') + '.shapes'  # shapefile path
        try:
            with open(sp, 'r') as f:  # read existing shapefile
                s = [x.split() for x in f.read().splitlines()]
                assert len(s) == n, 'Shapefile out of sync'
        except:
            s = [
                exif_size(Image.open(f))
                for f in tqdm(self.img_files, desc='Reading image shapes')
            ]
            np.savetxt(sp, s, fmt='%g')  # overwrites existing (if any)

        self.shapes = np.array(s, dtype=np.float64)

        # Rectangular Training  https://github.com/ultralytics/yolov3/issues/232
        if self.rect:
            # Sort by aspect ratio
            s = self.shapes  # wh
            ar = s[:, 1] / s[:, 0]  # aspect ratio
            irect = ar.argsort()
            self.img_files = [self.img_files[i] for i in irect]
            self.label_files = [self.label_files[i] for i in irect]
            self.shapes = s[irect]  # wh
            ar = ar[irect]

            # Set training image shapes
            shapes = [[1, 1]] * nb
            for i in range(nb):
                ari = ar[bi == i]
                mini, maxi = ari.min(), ari.max()
                if maxi < 1:
                    shapes[i] = [maxi, 1]
                elif mini > 1:
                    shapes[i] = [1, 1 / mini]

            self.batch_shapes = np.ceil(
                np.array(shapes) * img_size / 32. + pad).astype(np.int) * 32

        # Cache labels
        self.imgs = [None] * n
        self.labels = [np.zeros((0, 5), dtype=np.float32)] * n
        create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
        nm, nf, ne, ns, nd = 0, 0, 0, 0, 0  # number missing, found, empty, datasubset, duplicate
        np_labels_path = str(Path(
            self.label_files[0]).parent) + '.npy'  # saved labels in *.npy file
        if os.path.isfile(np_labels_path):
            s = np_labels_path  # print string
            x = np.load(np_labels_path, allow_pickle=True)
            if len(x) == n:
                self.labels = x
                labels_loaded = True
        else:
            s = path.replace('images', 'labels')

        pbar = tqdm(self.label_files)
        for i, file in enumerate(pbar):
            if labels_loaded:
                l = self.labels[i]
                # np.savetxt(file, l, '%g')  # save *.txt from *.npy file
            else:
                try:
                    with open(file, 'r') as f:
                        l = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)
                except:
                    nm += 1  # print('missing labels for image %s' % self.img_files[i])  # file missing
                    continue

            if l.shape[0]:
                assert l.shape[1] == 5, '> 5 label columns: %s' % file
                assert (l >= 0).all(), 'negative labels: %s' % file
                assert (l[:, 1:] <= 1).all(
                ), 'non-normalized or out of bounds coordinate labels: %s' % file
                if np.unique(l,
                             axis=0).shape[0] < l.shape[0]:  # duplicate rows
                    nd += 1  # print('WARNING: duplicate rows in %s' % self.label_files[i])  # duplicate rows
                if single_cls:
                    l[:, 0] = 0  # force dataset into single-class mode
                self.labels[i] = l
                nf += 1  # file found

                # Create subdataset (a smaller dataset)
                if create_datasubset and ns < 1E4:
                    if ns == 0:
                        create_folder(path='./datasubset')
                        os.makedirs('./datasubset/images')
                    exclude_classes = 43
                    if exclude_classes not in l[:, 0]:
                        ns += 1
                        # shutil.copy(src=self.img_files[i], dst='./datasubset/images/')  # copy image
                        with open('./datasubset/images.txt', 'a') as f:
                            f.write(self.img_files[i] + '\n')

                # Extract object detection boxes for a second stage classifier
                if extract_bounding_boxes:
                    p = Path(self.img_files[i])
                    img = cv2.imread(str(p))
                    h, w = img.shape[:2]
                    for j, x in enumerate(l):
                        f = '%s%sclassifier%s%g_%g_%s' % (
                            p.parent.parent, os.sep, os.sep, x[0], j, p.name)
                        if not os.path.exists(Path(f).parent):
                            os.makedirs(
                                Path(f).parent)  # make new output folder

                        b = x[1:] * [w, h, w, h]  # box
                        b[2:] = b[2:].max()  # rectangle to square
                        b[2:] = b[2:] * 1.3 + 30  # pad
                        b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)

                        b[[0, 2]] = np.clip(b[[0, 2]], 0,
                                            w)  # clip boxes outside of image
                        b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
                        assert cv2.imwrite(f, img[
                            b[1]:b[3],
                            b[0]:b[2]]), 'Failure extracting classifier boxes'
            else:
                ne += 1  # print('empty labels for image %s' % self.img_files[i])  # file empty
                # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i]))  # remove

            pbar.desc = 'Caching labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
                s, nf, nm, ne, nd, n)
        assert nf > 0 or n == 20288, 'No labels found in %s. See %s' % (
            os.path.dirname(file) + os.sep, help_url)
        if not labels_loaded and n > 1000:
            print('Saving labels to %s for faster future loading' %
                  np_labels_path)
            np.save(np_labels_path, self.labels)  # save for next time

        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
        if cache_images:  # if training
            gb = 0  # Gigabytes of cached images
            pbar = tqdm(range(len(self.img_files)), desc='Caching images')
            self.img_hw0, self.img_hw = [None] * n, [None] * n
            for i in pbar:  # max 10k images
                self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(
                    self, i)  # img, hw_original, hw_resized
                gb += self.imgs[i].nbytes
                pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)

        # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3
        detect_corrupted_images = False
        if detect_corrupted_images:
            from skimage import io  # conda install -c conda-forge scikit-image
            for file in tqdm(self.img_files,
                             desc='Detecting corrupted images'):
                try:
                    _ = io.imread(file)
                except:
                    print('Corrupted image detected: %s' % file)
	def preprocess_img(self, img, crop_bb):
		crop = ut.xywh2xyxy(crop_bb)
		img = img.crop(crop)
		img = self.transform(img)
		return img
Пример #11
0
 def val_model(self,val_dataloader,
               iou_thres=0.5,
               conf_thres=0.5,
               nms_thres=0.5,):
     print("validating...")
     self.model.eval()
     
     cls_list = []
     metrics_list = [] # list of tuples (tp,confs,pred)
     for batch_idx,(imgs,labels) in enumerate(tqdm.tqdm(val_dataloader,desc="Detecting objects")):
         
         # check the input_data format
         imgs = imgs.to(torch.float32)
         labels = labels.to(torch.float32)
         if self.cuda:
             imgs = imgs.to(self.device)
             labels = labels.to(self.device)
         
         # extract cls_name
         # labels: [detection_num,6]- 6:(1) img_id (corresponding to batch_idx) (2) cls_name (4) boxes
         # every item in outputs [detection_num,7] :(x1,y1,x2,y2,conf_score,cls_score,cls_pred)
         cls_list += labels[:,1].tolist()
         
         # rescale labels
         img_h = imgs.size(2)
         img_w = imgs.size(3)
         labels[:,2:] = xywh2xyxy(labels[:,2:])
         labels[:,2] *= img_w
         labels[:,4] *= img_w
         labels[:,3] *= img_h
         labels[:,5] *= img_h
             
             # from xywh->xyxy
         with torch.no_grad():
             outputs,_,__ = self.model(imgs)
             outputs = non_max_suppression(outputs,conf_thres,nms_thres)
         
             # before the ouputs are fed into compute_batch_info fcn
             # the outputs are supposed to be rescaled.
             # metrics_list: tp, pred_conf, pred_cls
             """
             to check the compute_batch_info fcn
             we try to build a new outputs tensor from label to make.
             And in theory, you will get a prefect result.
             """
             """
             FloatTensor = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor
             fake_outputs = [None for idx in range(len(outputs))]
             for i in range(len(outputs)):
                 label = labels[labels[:,0]==i]
                 if len(label) > 0:
                     fake_output = FloatTensor(np.zeros((len(label),7)))
                     fake_output[:,:4] = label[:,2:6]
                     fake_output[:,4] = 0.8
                     fake_output[:,5] = 0.8
                     fake_output[:,6] = label[:,1]
                 fake_outputs[i] = fake_output
             outputs = fake_outputs
             """
             metrics_list += compute_batch_info(outputs,labels,iou_thres)
         # for debug
         if batch_idx == 107:
             break
         
     # concatenate sample statistics
     tp,pred_conf,pred_cls = [np.concatenate(x,0) for x in list(zip(*metrics_list))]
     # print(tp.shape)
     # print(pred_conf.shape)
     # print(pred_cls.shape)
     # print(len(np.unique(pred_cls)))
     # a = input()
     precision,recall,ap,f1,ap_cls = ap_per_cls(tp,pred_conf,pred_cls,cls_list)
     # print(precision)
     # print(recall)
     # print(ap.shape)
     # print(f1)
     # print(ap_cls.shape)
     # a =input()
     self.model.train()
 
     return precision,recall,ap,f1,ap_cls
Пример #12
0
    def detect(self, frame):
        '''
        input: frame
        output: dst, box(xyxy(bbox) or 4x2(min_area)), center(xy)
        '''
        if frame is None or frame is []:
            raise TypeError('No frame input')
        # Resize the frame
        shape = self.shape
        img0 = np.copy(frame)
        H, W, C = frame.shape
        scale_factor = np.array([shape[0]/W, shape[1]/H])

        frame = cv2.resize(frame, shape) # 300,400,3
        
        # Gaussian Blur
        frame_gaussian = cv2.GaussianBlur(frame, (7, 7), 0)
        
        # RGB to HSV
        frame_hsv = cv2.cvtColor(frame_gaussian, cv2.COLOR_BGR2HSV)
        
        # Get mask according to HSV
        hsv_thres_values = self.get_trackbar_value() if self.debug else list(self.icol)
        mask = cv2.inRange(frame_hsv, np.array(hsv_thres_values[:3]), np.array(hsv_thres_values[3:]))
        
        # Median filter
        mask_f = cv2.medianBlur(mask, 5)

        # Morphology for three times
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
        mask_m = cv2.morphologyEx(mask_f, cv2.MORPH_CLOSE, kernel)
        mask_m = cv2.morphologyEx(mask_m, cv2.MORPH_OPEN, kernel)
        
        # Get Contours of The Mask
        box = None # xyxy
        center = None # xy
        _, contours, _= cv2.findContours(mask_m, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        cnt_list = [cnt for cnt in contours if self.contours_thres[0]<len(cnt)<self.contours_thres[1]]
        if cnt_list:
            cnt = max(contours, key=lambda x: x.shape[0]) # mutiple contours, choose the biggest one
        
            if self.box_type == 'bbox':
                # Get Bounding Box
                box = np.int0(scale_bbox(xywh2xyxy(cv2.boundingRect(cnt)), 1/scale_factor)) # xyxy
                center = np.int0(np.array([(box[0] + box[2])/2, (box[1] + box[3])/2]))
            elif self.box_type == 'min_area':
                # Get Minimum Area Box
                rect = cv2.minAreaRect(cnt) # center(x, y), (width, height), angle of rotation
                box = cv2.boxPoints(rect) # (4, 2)
                # scale box
                box = box / scale_factor
                center = np.sum(box, axis=0)/4
                box, center = np.int0(box), np.int0(center)
            else:
                raise TypeError('unsupported box type %s' % self.box_type)
        
        # Result
        dst = self.plot_img(img0, box)
        # view result
        if self.view_result:
            show_img(self.window_name, cv2.resize(dst, shape))
            # show_img(self.window_name, dst

        return dst, box, center
Пример #13
0
def evaluate(model, path, iou_thres, conf_thres, nms_thres, image_size,
             batch_size, num_workers, device):
    # 모델을 evaluation mode로 설정
    model.eval()

    # 데이터셋, 데이터로더 설정
    dataset = datasets.ListDataset(path,
                                   image_size,
                                   augment=False,
                                   multiscale=False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             collate_fn=dataset.collate_fn)

    labels = []
    sample_metrics = []  # List[Tuple] -> [(TP, confs, pred)]
    entire_time = 0
    for _, images, targets in tqdm.tqdm(dataloader,
                                        desc='Evaluate method',
                                        leave=False):
        if targets is None:
            continue

        # Extract labels
        labels.extend(targets[:, 1].tolist())

        # Rescale targets
        targets[:, 2:] = utils.xywh2xyxy(targets[:, 2:])
        targets[:, 2:] *= image_size

        # Predict objects
        start_time = time.time()
        with torch.no_grad():
            images = images.to(device)
            outputs = model(images)
            outputs = utils.NMS(outputs, conf_thres, nms_thres)
        entire_time += time.time() - start_time

        # Compute true positives, predicted scores and predicted labels per batch
        sample_metrics.extend(
            utils.get_batch_statistics(outputs, targets, iou_thres))

    # Concatenate sample statistics
    if len(sample_metrics) == 0:
        true_positives, pred_scores, pred_labels = np.array([]), np.array(
            []), np.array([])
    else:
        true_positives, pred_scores, pred_labels = [
            np.concatenate(x, 0) for x in list(zip(*sample_metrics))
        ]

    # Compute AP
    precision, recall, AP, f1, ap_class = utils.ap_per_class(
        true_positives, pred_scores, pred_labels, labels)

    # Compute inference time and fps
    inference_time = entire_time / dataset.__len__()
    fps = 1 / inference_time

    # Export inference time to miliseconds
    inference_time *= 1000

    return precision, recall, AP, f1, ap_class, inference_time, fps
Пример #14
0
    def forward(self, pred, target):
        '''
        args:
            pred,预测得到的tensor,batch x self.S x self.S x self.pred_c
            target,将真实的标签进行了encode,batch x self.S x self.S x self.pred_c
        '''
        # 得到batch数和pred所在的device
        N = target.size(0)
        device = pred.device
        # 得到匹配了gtbb的cell的坐标(这里是mask)和没有匹配gtbb的cell的坐标
        coo_mask = target[..., 4] > 0  # batch x S x S
        noo_mask = target[..., 4] == 0  # batch x S x S

        # ----- 正样本部分 -----
        # 把预测分成不同的部分
        # batch*matched_cell_num x pred_c
        coo_pred = pred[coo_mask].view(-1, self.cell_channel)
        # batch*matched_cell_num*B x 5
        box_pred = coo_pred[:, :self.no_class].reshape(-1, 5)
        # batch*matched_cell_num x C
        class_pred = coo_pred[:, self.no_class:]

        # 把标签分成不同的部分
        # batch*matched_cell_num x pred_c
        coo_target = target[coo_mask].view(-1, self.cell_channel)
        # batch*matched_cell_num*B x 5
        box_target = coo_target[:, :self.no_class].reshape(-1, 5)
        # batch*matched_cell_num x C
        class_target = coo_target[:, self.no_class:]

        # ----- 负样本部分 -----
        noo_pred_c = pred[noo_mask].view(-1, self.cell_channel)[:, [4, 9]]
        noo_target_c = target[noo_mask].view(-1, self.cell_channel)[:, [4, 9]]

        # ----- 计算负样本部分 -----
        noo_loss = self.mse(noo_pred_c, noo_target_c)

        # ----- 计算正样本部分 -----
        coo_response_index = []
        coo_not_response_index = []
        boxes_target_iou = []
        for i in range(0, box_target.size(0), self.B):
            # 因为我们是resize成[N, 5]的维度,所以每个cell的bbox有2个
            box1 = box_pred[i:i + self.B]
            # target那里这两个bbox是一样的,所以考虑一个就好了
            box2 = box_target[i].view(-1, 5)
            iou = boxes_iou(xywh2xyxy(box1[:, :4], self.cell_size),
                            xywh2xyxy(box2[:, :4], self.cell_size))  # [2, 1]
            max_iou, max_index = iou.max(0)
            coo_response_index.append(i + max_index.item())
            for bb in range(self.B):
                if bb != max_index:
                    coo_not_response_index.append(i + bb)
            boxes_target_iou.append(max_iou)
        # 1. 正样本中有关confidencee的loss,包括两部分
        #   因为一个cell会有多个bboxes,只用其中最高的那个bboxes对应confidence,
        #   其confidence=IoU*1,另外的bboxes一律对应0。
        box_pred_response = box_pred[coo_response_index]
        box_pred_not_response = box_pred[coo_not_response_index]
        contain_loss = self.mse(
            box_pred_response[:, 4],
            torch.tensor(boxes_target_iou).to(device),
        )
        not_contain_loss = self.mse(
            box_pred_not_response[:, 4],
            torch.zeros_like(box_pred_not_response[:, 4], device=device),
        )
        # 2. loc loss
        #   loc loss只使用对应上的bbox来计算(即2个bboxes中和gtbb最大的那个bbox)
        box_target_response = box_target[coo_response_index]
        loc_loss = self.mse(
            box_pred_response[:, :2],
            box_target_response[:, :2],
        ) + self.mse(
            box_pred_response[:, 2:4].sqrt(),
            box_target_response[:, 2:4].sqrt(),
        )
        # 3. class loss
        #   显然class loss就使用有obj的cell的分类来计算的
        class_loss = self.mse(class_pred, class_target)

        # ----- 所有的loss相加,并除以batch数 -----
        # 为什么contain loss会乘以2?????
        all_loss = (self.l_coord * loc_loss + 2 * contain_loss +
                    not_contain_loss + self.l_noobj * noo_loss +
                    class_loss) / N

        return all_loss
Пример #15
0
    def get_mosaic(self, n, cross_x, cross_y, tensor_img, boxes):
        t_height = tensor_img.shape[1]
        t_width = tensor_img.shape[2]

        xyxy_bboxes = utils.xywh2xyxy(boxes[:, 1:])

        relative_cross_x = cross_x / self.img_size
        relative_cross_y = cross_y / self.img_size

        #CALCULATING TARGET WIDTH AND HEIGHT OF PICTURE
        if n == 0:
            width_of_nth_pic = cross_x
            height_of_nth_pic = cross_y
        elif n == 1:
            width_of_nth_pic = self.img_size - cross_x
            height_of_nth_pic = cross_y
        elif n == 2:
            width_of_nth_pic = cross_x
            height_of_nth_pic = self.img_size - cross_y
        elif n == 3:
            width_of_nth_pic = self.img_size - cross_x
            height_of_nth_pic = self.img_size - cross_y

        # self.img_size - width_of_1st_pic
        # selg.img_size - height_of_1st_pic

        # CHOOSING TOP LEFT CORNER (doing offset to have more than fex pixels in bbox :-) )
        cut_x1 = random.randint(0, int(t_width * 0.33))
        cut_y1 = random.randint(0, int(t_height * 0.33))

        # Now we should find which axis should we randomly enlarge (this we do by finding out which ratio is bigger); cross x is basically width of the top left picture
        if (t_width - cut_x1) / width_of_nth_pic < (
                t_height - cut_y1) / height_of_nth_pic:
            cut_x2 = random.randint(cut_x1 + int(t_width * 0.67), t_width)
            cut_y2 = int(cut_y1 + (cut_x2 - cut_x1) / width_of_nth_pic *
                         height_of_nth_pic)

        else:
            cut_y2 = random.randint(cut_y1 + int(t_height * 0.67), t_height)
            cut_x2 = int(cut_x1 + (cut_y2 - cut_y1) / height_of_nth_pic *
                         width_of_nth_pic)

        # RESIZING AND INSERTING (TO DO 2D interpolation wants 4 dimensions, so I add and remove one by using None and squeeze)
        tensor_img = F.interpolate(
            tensor_img[:, cut_y1:cut_y2, cut_x1:cut_x2][None],
            (height_of_nth_pic, width_of_nth_pic)).squeeze()

        # BBOX
        relative_cut_x1 = cut_x1 / t_width
        relative_cut_y1 = cut_y1 / t_height
        relative_cropped_width = (cut_x2 - cut_x1) / t_width
        relative_cropped_height = (cut_y2 - cut_y1) / t_height

        # SHIFTING TO CUTTED IMG SO X1 Y1 WILL 0
        xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] - relative_cut_x1
        xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] - relative_cut_y1
        xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] - relative_cut_x1
        xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] - relative_cut_y1

        # RESIZING TO CUTTED IMG SO X2 WILL BE 1
        xyxy_bboxes[:, 0] /= relative_cropped_width
        xyxy_bboxes[:, 1] /= relative_cropped_height
        xyxy_bboxes[:, 2] /= relative_cropped_width
        xyxy_bboxes[:, 3] /= relative_cropped_height

        # CLAMPING BOUNDING BOXES, SO THEY DO NOT OVERCOME OUTSIDE THE IMAGE
        xyxy_bboxes[:, 0].clamp_(0, 1)
        xyxy_bboxes[:, 1].clamp_(0, 1)
        xyxy_bboxes[:, 2].clamp_(0, 1)
        xyxy_bboxes[:, 3].clamp_(0, 1)

        # FILTER TO THROUGH OUT ALL SMALL BBOXES
        filter_minbbox = (
            xyxy_bboxes[:, 2] - xyxy_bboxes[:, 0] > self.bbox_minsize) & (
                xyxy_bboxes[:, 3] - xyxy_bboxes[:, 1] > self.bbox_minsize)

        # RESIZING TO MOSAIC
        if n == 0:
            xyxy_bboxes[:, 0] *= relative_cross_x  #
            xyxy_bboxes[:, 1] *= relative_cross_y  #(1 - relative_cross_y)
            xyxy_bboxes[:, 2] *= relative_cross_x  #
            xyxy_bboxes[:, 3] *= relative_cross_y  #(1 - relative_cross_y)
        elif n == 1:
            xyxy_bboxes[:, 0] *= (1 - relative_cross_x)
            xyxy_bboxes[:, 1] *= relative_cross_y
            xyxy_bboxes[:, 2] *= (1 - relative_cross_x)
            xyxy_bboxes[:, 3] *= relative_cross_y
        elif n == 2:
            xyxy_bboxes[:, 0] *= relative_cross_x
            xyxy_bboxes[:, 1] *= (1 - relative_cross_y)
            xyxy_bboxes[:, 2] *= relative_cross_x
            xyxy_bboxes[:, 3] *= (1 - relative_cross_y)
        elif n == 3:
            xyxy_bboxes[:, 0] *= (1 - relative_cross_x)
            xyxy_bboxes[:, 1] *= (1 - relative_cross_y)
            xyxy_bboxes[:, 2] *= (1 - relative_cross_x)
            xyxy_bboxes[:, 3] *= (1 - relative_cross_y)

        # RESIZING TO MOSAIC
        if n == 0:
            xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0]  # + relative_cross_x
            xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1]  # + relative_cross_y
            xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2]  # + relative_cross_x
            xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3]  # + relative_cross_y
        elif n == 1:
            xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] + relative_cross_x
            xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1]
            xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] + relative_cross_x
            xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3]
        elif n == 2:
            xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0]
            xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] + relative_cross_y
            xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2]
            xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] + relative_cross_y
        elif n == 3:
            xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] + relative_cross_x
            xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] + relative_cross_y
            xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] + relative_cross_x
            xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] + relative_cross_y

        boxes = boxes[filter_minbbox]
        boxes[:, 1:] = utils.xyxy2xywh(xyxy_bboxes)[filter_minbbox]

        return tensor_img, boxes
Пример #16
0
    def forward(self, x, y_true=None):
        """
        Transform feature map into 2-D tensor. Transformation includes
        1. Re-organize tensor to make each row correspond to a bbox
        2. Transform center coordinates
        bx = sigmoid(tx) + cx
        by = sigmoid(ty) + cy
        3. Transform width and height
        bw = pw * exp(tw)
        bh = ph * exp(th)
        4. Activation
        @Args
        x: (Tensor) feature map with size [bs, (5+nC)*nA, gs, gs]
            5 => [4 offsets (xc, yc, w, h), objectness]
        @Returns
        detections: (Tensor) feature map with size [bs, nA, gs, gs, 5+nC]
        """
        bs, _, gs, _ = x.size()
        stride = self.reso // gs  # no pooling used, stride is the only downsample
        num_attrs = 5 + self.num_classes  # tx, ty, tw, th, p0
        nA = len(self.anchors)
        scaled_anchors = torch.Tensor([(a_w / stride, a_h / stride)
                                       for a_w, a_h in self.anchors]).cuda()

        # Re-organize [bs, (5+nC)*nA, gs, gs] => [bs, nA, gs, gs, 5+nC]
        x = x.view(bs, nA, num_attrs, gs, gs).permute(0, 1, 3, 4,
                                                      2).contiguous()

        pred = torch.Tensor(bs, nA, gs, gs, num_attrs).cuda()

        pred_tx = torch.sigmoid(x[..., 0]).cuda()
        pred_ty = torch.sigmoid(x[..., 1]).cuda()
        pred_tw = x[..., 2].cuda()
        pred_th = x[..., 3].cuda()
        pred_conf = torch.sigmoid(x[..., 4]).cuda()
        if self.training == True:
            pred_cls = x[..., 5:].cuda()  # softmax in cross entropy
        else:
            pred_cls = F.softmax(x[..., 5:], dim=-1).cuda()  # class

        grid_x = torch.arange(gs).repeat(gs, 1).view([1, 1, gs,
                                                      gs]).float().cuda()
        grid_y = torch.arange(gs).repeat(gs, 1).t().view([1, 1, gs,
                                                          gs]).float().cuda()
        anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
        pred[..., 0] = pred_tx + grid_x
        pred[..., 1] = pred_ty + grid_y
        pred[..., 2] = torch.exp(pred_tw) * anchor_w
        pred[..., 3] = torch.exp(pred_th) * anchor_h
        pred[..., 4] = pred_conf
        pred[..., 5:] = pred_cls

        if not self.training:
            pred[..., :4] *= stride
            return pred.view(bs, -1, num_attrs)
        else:
            gt_tx = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_ty = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_tw = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_th = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_conf = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            gt_cls = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()

            obj_mask = torch.zeros(bs, nA, gs, gs, requires_grad=False).cuda()
            for idx in range(bs):
                for y_true_one in y_true[idx]:
                    y_true_one = y_true_one.cuda()
                    gt_bbox = y_true_one[:4] * gs
                    gt_cls_label = int(y_true_one[4])

                    gt_xc, gt_yc, gt_w, gt_h = gt_bbox[0:4]
                    gt_i = gt_xc.long().cuda()
                    gt_j = gt_yc.long().cuda()

                    pred_bbox = pred[idx, :, gt_j, gt_i, :4]
                    ious = IoU(xywh2xyxy(pred_bbox), xywh2xyxy(gt_bbox))
                    best_iou, best_a = torch.max(ious, 0)

                    w, h = scaled_anchors[best_a]
                    gt_tw[idx, best_a, gt_j, gt_i] = torch.log(gt_w / w)
                    gt_th[idx, best_a, gt_j, gt_i] = torch.log(gt_h / h)
                    gt_tx[idx, best_a, gt_j, gt_i] = gt_xc - gt_i.float()
                    gt_ty[idx, best_a, gt_j, gt_i] = gt_yc - gt_j.float()
                    gt_conf[idx, best_a, gt_j, gt_i] = best_iou
                    gt_cls[idx, best_a, gt_j, gt_i] = gt_cls_label

                    obj_mask[idx, best_a, gt_j, gt_i] = 1

            MSELoss = nn.MSELoss(reduction='sum')
            BCELoss = nn.BCELoss(reduction='sum')
            CELoss = nn.CrossEntropyLoss(reduction='sum')

            loss = dict()
            loss['x'] = MSELoss(pred_tx * obj_mask, gt_tx * obj_mask)
            loss['y'] = MSELoss(pred_ty * obj_mask, gt_ty * obj_mask)
            loss['w'] = MSELoss(pred_tw * obj_mask, gt_tw * obj_mask)
            loss['h'] = MSELoss(pred_th * obj_mask, gt_th * obj_mask)
            # loss['cls'] = BCELoss(pred_cls * obj_mask, cls_mask * obj_mask)

            loss['cls'] = CELoss(
                (pred_cls * obj_mask.unsqueeze(-1)).view(-1, self.num_classes),
                (gt_cls * obj_mask).view(-1).long())
            loss['conf'] = MSELoss(pred_conf * obj_mask * 5, gt_conf * obj_mask * 5) + \
                MSELoss(pred_conf * (1 - obj_mask), pred_conf * (1 - obj_mask))

            pprint(loss)

            return loss
Пример #17
0
            print('\n---- Evaluating Model ----')
            # Evaluate the model on the validation set
            model.eval()

            labels = []
            sample_metrics = []  # List of tuples (TP, confs, pred)

            for ind, (imgs, targets) in enumerate(val_loader):

                imgs = imgs.to(device)
                targets = targets.to(device)

                # Extract labels
                labels += targets[:, 1].tolist()
                # Rescale target
                targets[:, 2:] = utils.xywh2xyxy(targets[:, 2:])
                targets[:, 2:] *= args.img_size

                with torch.no_grad():
                    outputs, _ = model(imgs)
                    outputs = utils.non_max_suppression(
                        outputs,
                        conf_thresh=args.conf_thresh,
                        nms_thresh=args.nms_thresh)

                sample_metrics += utils.get_batch_statistics(
                    outputs, targets, iou_thresh=args.map_thresh)

            if len(sample_metrics) == 0:
                print('---- mAP is NULL')
            else:
Пример #18
0
    def __getitem__(self, index):

        img_path = self.img_files[index % len(self.img_files)].rstrip()
        label_path = self.label_files[index % len(self.img_files)].rstrip()

        # Getting image
        img = Image.open(img_path).convert('RGB')
        width, height = img.size

        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))

        # RESIZING
        if width > height:
            ratio = height / width
            t_width = self.img_size
            t_height = int(ratio * self.img_size)
        else:
            ratio = width / height
            t_width = int(ratio * self.img_size)
            t_height = self.img_size

        img = transforms.functional.resize(img, (t_height, t_width))

        # IF TRAIN APPLY BRIGHTNESS CONTRAST HUE SATURTATION
        if self.train:
            brightness_rnd = random.uniform(1 - self.brightness_range,
                                            1 + self.brightness_range)
            contrast_rnd = random.uniform(1 - self.contrast_range,
                                          1 + self.contrast_range)
            hue_rnd = random.uniform(-self.hue_range, self.hue_range)
            saturation_rnd = random.uniform(1 - self.saturation_range,
                                            1 + self.saturation_range)

            img = transforms.functional.adjust_brightness(img, brightness_rnd)
            img = transforms.functional.adjust_contrast(img, contrast_rnd)
            img = transforms.functional.adjust_hue(img, hue_rnd)
            img = transforms.functional.adjust_saturation(img, saturation_rnd)

        # CONVERTING TO TENSOR
        tensor_img = transforms.functional.to_tensor(img)

        # Handle grayscaled images
        if len(tensor_img.shape) != 3:
            tensor_img = tensor_img.unsqueeze(0)
            tensor_img = tensor_img.expand((3, img.shape[1:]))

        # !!!WARNING IN PIL IT'S WIDTH HEIGHT, WHEN IN PYTORCH IT IS HEIGHT WIDTH

        # Apply augmentations for train it would be mosaic
        if self.train:
            mossaic_img = torch.zeros(3, self.img_size, self.img_size)

            # FINDING CROSS POINT
            cross_x = int(
                random.uniform(self.img_size * self.cross_offset,
                               self.img_size * (1 - self.cross_offset)))
            cross_y = int(
                random.uniform(self.img_size * self.cross_offset,
                               self.img_size * (1 - self.cross_offset)))

            fragment_img, fragment_bbox = self.get_mosaic(
                0, cross_x, cross_y, tensor_img, boxes)
            mossaic_img[:, 0:cross_y, 0:cross_x] = fragment_img
            boxes = fragment_bbox

            for n in range(1, 4):
                raw_fragment_img, raw_fragment_bbox = self.get_img_for_mosaic(
                    brightness_rnd, contrast_rnd, hue_rnd, saturation_rnd)
                fragment_img, fragment_bbox = self.get_mosaic(
                    n, cross_x, cross_y, raw_fragment_img, raw_fragment_bbox)
                boxes = torch.cat([boxes, fragment_bbox])

                if n == 1:
                    mossaic_img[:, 0:cross_y,
                                cross_x:self.img_size] = fragment_img
                elif n == 2:
                    mossaic_img[:, cross_y:self.img_size,
                                0:cross_x] = fragment_img
                elif n == 3:
                    mossaic_img[:, cross_y:self.img_size,
                                cross_x:self.img_size] = fragment_img

            #Set mossaic to return tensor
            tensor_img = mossaic_img

        # For validation it would be letterbox
        else:
            xyxy_bboxes = utils.xywh2xyxy(boxes[:, 1:])

            #IMG
            padding = abs((t_width - t_height)) // 2
            padded_img = torch.zeros(3, self.img_size, self.img_size)
            if t_width > t_height:
                padded_img[:, padding:padding + t_height] = tensor_img
            else:
                padded_img[:, :, padding:padding + t_width] = tensor_img

            tensor_img = padded_img

            relative_padding = padding / self.img_size
            #BOXES
            if t_width > t_height:
                #Change y's relative position
                xyxy_bboxes[:, 1] *= ratio
                xyxy_bboxes[:, 3] *= ratio
                xyxy_bboxes[:, 1] += relative_padding
                xyxy_bboxes[:, 3] += relative_padding
            else:  #x's
                xyxy_bboxes[:, 0] *= ratio
                xyxy_bboxes[:, 2] *= ratio
                xyxy_bboxes[:, 0] += relative_padding
                xyxy_bboxes[:, 2] += relative_padding

            boxes[:, 1:] = utils.xyxy2xywh(xyxy_bboxes)

        targets = torch.zeros((len(boxes), 6))
        targets[:, 1:] = boxes

        return img_path, tensor_img, targets
Пример #19
0
    def process_bboxes(self,
                       predictions,
                       image_info,
                       confidence_threshold=0.01,
                       overlap_threshold=0.5,
                       nms=True):

        image_idx_ = []
        bboxes_ = []
        classes_ = []
        conf_ = []

        for i, predictions_ in enumerate(predictions):
            if i not in [
                    0, 1, 2
            ]:  # Use this for specifying only a subset of detectors
                continue
            predictions_ = predictions_.permute(0, 2, 3, 1)

            for j, prediction in enumerate(predictions_):
                prediction = prediction.contiguous().view(
                    -1, self.num_features)
                prediction[:, 5:] = F.softmax(prediction[:, 5:], dim=-1)
                classes = torch.argmax(prediction[:, 5:], dim=-1)
                idx = torch.arange(0, len(prediction))
                confidence = prediction[:, 4] * prediction[idx, 5 + classes]

                mask = confidence > confidence_threshold

                if sum(mask) == 0:
                    continue

                bboxes = prediction[mask, :4].clone()
                bboxes[:, ::2] *= self.strides[i]
                bboxes[:, 1::2] *= self.strides[i]
                bboxes = xywh2xyxy(bboxes)

                confidence = confidence[mask]
                classes = classes[mask]

                bboxes[:, ::2] = torch.clamp(
                    bboxes[:, ::2],
                    min=image_info['padding'][0][j] + 1,
                    max=self.image_size[0] - image_info['padding'][2][j])
                bboxes[:, 1::2] = torch.clamp(
                    bboxes[:, 1::2],
                    min=image_info['padding'][1][j] + 1,
                    max=self.image_size[1] - image_info['padding'][3][j])

                image_idx_.append(j)
                bboxes_.append(bboxes)
                classes_.append(classes)
                conf_.append(confidence)

        bboxes_ = \
            [torch.cat([bboxes_[ii] for ii, k in enumerate(image_idx_) if k == idx]) for idx in np.unique(image_idx_)]
        classes_ = \
            [torch.cat([classes_[ii] for ii, k in enumerate(image_idx_) if k == idx]) for idx in np.unique(image_idx_)]
        conf_ = \
            [torch.cat([conf_[ii] for ii, k in enumerate(image_idx_) if k == idx]) for idx in np.unique(image_idx_)]

        image_idx = []
        bboxes = []
        confidence = []
        classes = []

        for i, idx in enumerate(np.unique(image_idx_)):
            if nms:
                cls = torch.unique(classes_[i])
                for c in cls:
                    cls_mask = (classes_[i] == c).nonzero().flatten()
                    mask = non_maximum_suppression(bboxes_[i][cls_mask],
                                                   conf_[i][cls_mask],
                                                   overlap=overlap_threshold)
                    bboxes.append(bboxes_[i][cls_mask][mask])
                    classes.append(classes_[i][cls_mask][mask])
                    confidence.append(conf_[i][cls_mask][mask])
                    image_idx.append([image_info['id'][idx]] *
                                     len(bboxes_[i][cls_mask][mask]))
            else:
                bboxes.append(bboxes_[i])
                confidence.append(conf_[i])
                classes.append(classes_[i])
                image_idx.append([image_info['id'][idx]] * len(bboxes_[i]))

        if len(bboxes) > 0:
            bboxes = torch.cat(bboxes).view(-1, 4)
            classes = torch.cat(classes).flatten()
            confidence = torch.cat(confidence).flatten()
            image_idx = [item for sublist in image_idx for item in sublist]

            return bboxes, classes, confidence, image_idx
        else:
            return torch.tensor([], device=self.device), \
                   torch.tensor([], dtype=torch.long, device=self.device), \
                   torch.tensor([], device=self.device), \
                   []