Exemplo n.º 1
0
    def extract_regions(self, features, boxes, indices):
        regions = []

        # De-Normalize to the feature map size
        multiplier = torch.tensor([self.Wp, self.Hp, self.Wp, self.Hp]).view(
            (4, 1))
        boxes = ((boxes * multiplier).round().type(torch.int32)
                 )  # shape (4, p) (x1y1x2y2)

        # Clip boxes that are out of range
        boxes = ops.clip_boxes_to_image(boxes.T, (self.Hp, self.Wp)).T

        # Remove tiny boxes
        boxes, indices = self.remove_tiny_boxes(boxes,
                                                min_side=(self.Wp + self.Hp) //
                                                20,
                                                idx=indices)

        for index in range(len(indices)):
            idx = boxes[:, index]
            x_min, x_max = idx[0], idx[2]
            y_min, y_max = idx[1], idx[3]
            # print(f'{x_min}, {x_max} | {y_min}, {y_max}')
            cropped = features[:, x_min:x_max + 1, y_min:y_max + 1]
            cropped = F.interpolate(cropped.view((1, *cropped.shape)),
                                    (self.X, self.Y),
                                    mode="bilinear")[0]
            regions.append(cropped)
        regions = torch.stack(regions) if len(regions) > 0 else torch.empty(0)
        return regions
Exemplo n.º 2
0
    def _first_stage(self, imgs: torch.Tensor):
        with EvalScope(self.pNet):
            _, c, h, w = imgs.shape

            scale = 12.0 / self.minSize  # This is initial scale
            min_l = min(h, w)

            b, s, i = [], [], []

            while min_l * scale >= 12.:
                imgs = _nnf.interpolate(imgs,
                                        size=[int(h * scale),
                                              int(w * scale)],
                                        mode='area')
                reg, pro = self.pNet(imgs)

                pro = pro[:, 1]

                strd = 2. / scale
                cell = 12. / scale

                msk = torch.ge(pro, self.pNetThreshold)  # b, h, w

                if msk.any():
                    indices = msk.nonzero()  # n, 3 <- (i, y, x)
                    idx, r, c = indices[:, 0], indices[:, 1], indices[:, 2]
                    pro = pro[msk]

                    reg = reg.permute(0, 2, 3,
                                      1)  # b, h, w, c <- (x1^, y1^, x2^, y2^)
                    reg = reg[msk]

                    x1, y1 = c * strd, r * strd
                    x2, y2 = x1 + cell, y1 + cell

                    bbs = torch.dstack([x1, y1, x2, y2]).squeeze(0)
                    bbs = self._bb_reg(bbs, reg)
                    nms_idx = batched_nms(bbs, pro, idx, self.nmsThreshold)

                    b.append(bbs[nms_idx])
                    s.append(pro[nms_idx])
                    i.append(idx[nms_idx])

                scale = scale * self.factor

            if len(b) > 0:
                b = torch.cat(b, dim=0)
                s = torch.cat(s, dim=0)
                i = torch.cat(i, dim=0)

                nms_idx = batched_nms(b, s, i, self.nmsThreshold)
                b = clip_boxes_to_image(b[nms_idx], size=(w, h)).int()
                i = i[nms_idx]

                return b, i
            else:
                return None
Exemplo n.º 3
0
    def __getitem__(self, index):
        img, target = tools.load_img_target(self, index)
        img_info = self.coco.loadImgs(self.ids[index])[0]
        iw, ih = img_info['width'], img_info['height']

        class_labels, bbox_labels, mask_labels = [], [], []
        for obj in target:
            if not tools.is_correct_instance(obj, self.cat_idx_list, iw, ih):
                continue

            class_labels.append(self.cat_to_label_map[obj['category_id']])
            bbox_labels.append(obj['bbox'])

            # rle = coco_mask.frPyObjects(obj['segmentation'], ih, iw)
            # if obj['iscrowd'] == 0:
            #     rle = coco_mask.merge(rle)
            # mask = coco_mask.decode(rle)
            # mask_labels.append(mask)

        transformed = self.img_transform(image=img,
                                         bboxes=bbox_labels,
                                         class_labels=class_labels)
        # transformed = self.img_transform(image=img, masks=mask_labels, bboxes=bbox_labels, class_labels=class_labels)
        img = tools.TENSOR_TRANSFORM(transformed['image'])
        # mask_labels = transformed['masks']
        class_labels = transformed['class_labels']
        bbox_labels = transformed['bboxes']

        if len(bbox_labels) == 0:
            # For any instance with classification label 0 (background), only classification loss will be computed, without mask loss, centerness loss and bbox loss.
            # When there is no instances in an image, it doesn't matter the value of the added bbox.
            mask_labels = [np.zeros((self.h, self.w))]
            bbox_labels = [[0., 0., 10., 10.]]
            class_labels = [0]

        class_labels = torch.as_tensor(class_labels)

        # instance_mask_labels = self._generate_instance_mask_labels(mask_labels, bbox_labels)
        # instance_mask_labels = torch.as_tensor(np.array(instance_mask_labels)).float()

        bbox_labels = cv_ops.box_convert(torch.as_tensor(bbox_labels,
                                                         dtype=torch.float32),
                                         in_fmt='xywh',
                                         out_fmt='xyxy')
        bbox_labels = cv_ops.clip_boxes_to_image(bbox_labels, (ih, iw))

        class_targets, distance_targets = self._encode_targets(
            class_labels, bbox_labels, None)
        centerness_targets = tools.encode_centerness_targets(distance_targets)

        return img, self.points, {
            'class': class_targets,
            'distance': distance_targets,
            'centerness': centerness_targets
        }
Exemplo n.º 4
0
    def update(self, img: ImageT) -> np.ndarray:
        self.model.eval()

        side_size = int(round(self.curr_instance_side_size))
        bbox = BBox.build_from_center_and_size(
            self.target_bbox.center, np.asarray((side_size, side_size)))
        instance_img = center_crop_and_resize(
            img, bbox, (self.cfg.instance_size, self.cfg.instance_size))

        if self.on_instance_img_extract:
            self.on_instance_img_extract(instance_img)

        instance_img = pil_to_tensor(instance_img).to(self.device)
        pred_reg, pred_cls = self.model.inference(instance_img,
                                                  self.kernel_reg,
                                                  self.kernel_cls)

        pred_reg = pred_reg.squeeze()
        pred_cls = pred_cls.squeeze()

        pred_cls = F.softmax(pred_cls, dim=1)
        pred_cls_max = pred_cls.argmax(dim=1)
        # TODO Store the range somewhere as it may be faster.
        scores = pred_cls[list(range(len(pred_cls))), pred_cls_max]
        scores[pred_cls_max == 0] = 0  # The 0-th position is the background.

        # TODO Think of modifying the regression predictions in place.
        xy_vals = pred_reg[:, :2] * self.anchors[:, 2:] + self.anchors[:, :2]
        wh_vals = torch.exp(pred_reg[:, 2:]) * self.anchors[:, 2:]
        boxes = torch.hstack((xy_vals, wh_vals))
        boxes = ops.box_convert(boxes, 'cxcywh', 'xyxy')
        boxes = ops.clip_boxes_to_image(
            boxes, (self.cfg.instance_size, self.cfg.instance_size))

        response = (1 - self.cfg.cosine_win_influence) * response + \
                   self.cfg.cosine_win_influence * self.cosine_win

        # The assumption is that the peak response value is in the center of the
        # response map. Thus, we compute the change with respect to the center
        # and convert it back to the pixel coordinates in the image.
        peak_response_pos = np.asarray(
            np.unravel_index(response.argmax(), response.shape))

        # Update target scale.
        self.curr_instance_side_size *= new_scale

        # Change from [row, col] to [x, y] coordinates.
        self.target_bbox.shift(disp_in_image[::-1])
        self.target_bbox.rescale(new_scale, new_scale)

        return self.target_bbox.as_xywh()
Exemplo n.º 5
0
    def __getitem__(self, idx: int):
        img = torch.rand(self.img_shape)
        boxes = torch.tensor(
            [self._random_bbox() for _ in range(self.num_boxes)],
            dtype=torch.float32)
        boxes = ops.clip_boxes_to_image(boxes,
                                        (self.img_shape[1], self.img_shape[2]))

        # No problems if we pass same in_fmt and out_fmt, it is covered by box_convert
        converted_boxes = ops.box_convert(boxes,
                                          in_fmt="xyxy",
                                          out_fmt=self.box_fmt)
        labels = torch.randint(self.num_classes, (self.num_boxes, ),
                               dtype=torch.long)
        return img, {"boxes": converted_boxes, "labels": labels}
def multi_dim_boxes_clip(boxes: Union[Tensor, np.ndarray], box_range: Tuple[int, int]) \
        -> Union[Tensor, np.ndarray]:
    """
    A multi-dim wrapper of `torchvision.ops.clip_boxes_to_image`.

    Args:
        boxes: boxes which will be clipped. The original size could be any format matching :math:`(*shape, 4)`.
        box_range: The width and height used to clip the boxes.

    Returns:
        new_boxes: boxes clipped to match the `box_range`.
    """

    new_boxes = torch.from_numpy(boxes).clone() if isinstance(
        boxes, np.ndarray) else boxes.clone()
    ori_size = new_boxes.size()
    new_boxes = clip_boxes_to_image(new_boxes.reshape(-1, 4),
                                    box_range[::-1]).reshape(ori_size)
    return new_boxes.numpy() if isinstance(boxes, np.ndarray) else new_boxes
Exemplo n.º 7
0
    def __getitem__(self, idx: int):
        h = random.randint(self.im_size_min, self.im_size_max)
        w = random.randint(self.im_size_min, self.im_size_max)
        img_shape = (3, h, w)
        img = torch.rand(img_shape)

        num_boxes = random.randint(1, self.num_boxes_max)
        labels = torch.randint(self.class_start,
                               self.class_end, (num_boxes, ),
                               dtype=torch.long)

        boxes = torch.tensor(
            [self._random_bbox(img_shape) for _ in range(num_boxes)],
            dtype=torch.float32)
        boxes = ops.clip_boxes_to_image(boxes, (h, w))
        # No problems if we pass same in_fmt and out_fmt, it is covered by box_convert
        boxes = ops.box_convert(boxes, in_fmt="xyxy", out_fmt=self.box_fmt)
        if self.normalize:
            boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
        image_id = torch.tensor([idx])
        return img, {"image_id": image_id, "boxes": boxes, "labels": labels}
Exemplo n.º 8
0
    def compare(self, expected, actual, image=None, classifier=None):
        if image is None:
            reproj_threshold = 10
        else:
            h, w = image.shape[1:]
            reproj_threshold = min(h, w) * 0.01

        if not len(actual['boxes']):
            return 0 if len(expected['boxes']) else 1

        ge = expected['graph'] if 'graph' in expected else planograms.build_graph(expected['boxes'], expected['labels'], self.graph_threshold)
        ga = planograms.build_graph(actual['boxes'], actual['labels'], self.graph_threshold)
        matching = planograms.large_common_subgraph(ge, ga) # TODO: Possibility to use Tonioni
        if not len(matching):
            return 0
        found, missing_indices, missing_positions, missing_labels = planograms.finalize_via_ransac(
            matching, expected['boxes'], actual['boxes'], expected['labels'], actual['labels'],
            reproj_threshold=reproj_threshold,
        )
        if found is None: # --> couldn't calculate homography
            return len(matching) / len(expected['boxes'])

        if classifier is not None and image is not None and len(missing_positions):
            missing_positions = tvops.clip_boxes_to_image(missing_positions, image.shape[1:])
            valid_positions = (missing_positions[:,2] - missing_positions[:,0] > 1) & (missing_positions[:,3] - missing_positions[:,1] > 1)
            if not valid_positions.any():
                return found.sum() / len(found) # TODO: Also return which were actually missing

            missing_indices = missing_indices[valid_positions]
            missing_positions = missing_positions[valid_positions]
            missing_labels = [l for l, v in zip(missing_labels, valid_positions) if v]

            missing_imgs = torch.stack([datautils.resize_for_classification(image[:, y1:y2, x1:x2]) for x1, y1, x2, y2 in missing_positions.to(dtype=torch.long)])
            reclass_labels = classifier.classify(missing_imgs)
            for idx, expected_label, actual_label in zip(missing_indices, missing_labels, reclass_labels):
                if expected_label == actual_label[0]:
                    found[idx] = True
        return found.sum() / len(found) # TODO: Also return which were actually missing
Exemplo n.º 9
0
    def _third_stage(
        self, imgs: torch.Tensor, r_bbs: torch.Tensor, r_idxs: torch.Tensor
    ) -> Optional[Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]:
        _imgs = self._gather_rois(imgs, r_bbs, r_idxs, 48)

        with EvalScope(self.oNet):
            reg, lmk, pro = self.oNet(_imgs)
            mask = torch.ge(pro[:, 1], self.oNetThreshold)

            if not mask.any():
                return None

            reg = reg[mask]
            pro = pro[:, 1][mask]
            b = r_bbs[mask].type(torch.float32)
            i = r_idxs[mask]

            b = self._bb_reg(b, reg)
            j = batched_nms(b, pro, i, self.nmsThreshold)
            b = clip_boxes_to_image(b[j], size=imgs.shape[2:]).int()
            i = i[j]

            return b, i, lmk[j]
Exemplo n.º 10
0
def eval_dihe(encoder,
              sampleset,
              testset,
              batch_size,
              num_workers,
              k=(1, ),
              verbose=True):
    if verbose: print('Preparing classifier...')
    encoder.requires_grad_(False)

    classifier = production.Classifier(encoder,
                                       sampleset,
                                       batch_size=batch_size,
                                       num_workers=num_workers,
                                       k=max(k))

    total = 0
    correct = {knn: 0 for knn in k}
    missed = {}
    misclassification = {}
    total_per_ann = {}

    if verbose: print('Eval start!')
    for i, (img, target_anns, boxes) in enumerate(testset):
        if verbose and i % 10 == 0:
            print(f'{i}...')

        boxes = tvops.clip_boxes_to_image(boxes, (img.shape[1], img.shape[2]))
        imgs = torch.stack([
            datautils.resize_for_classification(img[:, y1:y2, x1:x2])
            for x1, y1, x2, y2 in boxes
        ])
        pred_anns = classifier.classify(imgs)

        total += len(target_anns)
        for a1, a2 in zip(target_anns, pred_anns):
            if a1 not in total_per_ann:
                total_per_ann[a1] = 0
            total_per_ann[a1] += 1

            for knn in k:
                if a1 in a2[:knn]: correct[knn] += 1
            if a1 != a2[0]:
                if a1 not in missed:
                    missed[a1] = 0
                    misclassification[a1] = {}
                if a2[0] not in misclassification[a1]:
                    misclassification[a1][a2[0]] = 0
                missed[a1] += 1
                misclassification[a1][a2[0]] += 1

    del classifier  # maybe this will solve memory problems caused by eval?

    encoder.requires_grad_(True)

    accuracy = {knn: c / total for knn, c in correct.items()}
    if verbose:
        print(
            f'Total annotations: {total}, Correctly guessed: {correct}, Accuracy: {accuracy}'
        )
        most_missed = sorted(
            ((v / total_per_ann[k], v, k) for k, v in missed.items()),
            reverse=True)[:10]
        print(
            f'Most missed: {", ".join(f"{a} ({n}, {p * 100} %)" for p, n, a in most_missed)}'
        )
        for _, n, k in most_missed[:3]:
            common_misclassifications = sorted(
                ((v / n, v, k) for k, v in misclassification[k].items()),
                reverse=True)[:3]
            print(
                f'{k}: Commonly mistaken for {", ".join(f"{a} ({n}, {p * 100} %)" for p, n, a in common_misclassifications)}'
            )
    return accuracy
Exemplo n.º 11
0
def val_one_epoch(model, data_loader, coco_gt, dist_logger, epoch_idx,
                  nms_cfg):
    pred_instances = []
    nms_pre, cls_score_thr, iou_thr = nms_cfg['nms_pre'], nms_cfg[
        'cls_score_thr'], nms_cfg['iou_thr']

    model.eval()
    processor = dist_logger.init_processor(data_loader)
    for img, data in processor:
        img = img.cuda(non_blocking=True)
        points = data['points'].cuda(non_blocking=True)
        img_info_list = coco_gt.loadImgs(data['img_id'].numpy())

        class_pred, distance_pred, centerness_pred = model(img)

        class_pred = class_pred.sigmoid()  # [B, num_points, num_classes]
        cls_pred_scores, cls_pred_indexes = class_pred.max(
            dim=-1)  # [B, num_points]
        bbox_pred = bbox_ops.convert_distance_to_bbox(
            points, distance_pred)  # [B, num_points, 4]
        centerness_pred = centerness_pred.sigmoid()  # [B, num_points]

        batch_size, _, num_classes = class_pred.shape
        _, _, ih, iw = img.shape

        for batch_idx in range(batch_size):
            b_cls_pred_scores, b_cls_pred_indexes, b_centerness_pred = cls_pred_scores[
                batch_idx], cls_pred_indexes[batch_idx], centerness_pred[
                    batch_idx]  # [num_points]
            b_bbox_pred = bbox_pred[batch_idx, :]  # [num_points, 4]

            _, top_idx = (b_cls_pred_scores * b_centerness_pred).topk(
                nms_pre)  # [topk]

            top_class_pred_scores, top_class_pred_indexes, top_centerness_pred = b_cls_pred_scores[
                top_idx], b_cls_pred_indexes[top_idx], b_centerness_pred[
                    top_idx]  # [topk]
            nms_scores = top_class_pred_scores * top_centerness_pred  # [topk]

            top_bbox_pred = b_bbox_pred[top_idx, :]  # [topk, 4]
            top_bbox_pred = cv_ops.clip_boxes_to_image(top_bbox_pred,
                                                       size=(ih, iw))

            valid_mask = top_class_pred_scores > cls_score_thr
            valid_class_pred_scores, valid_class_pred_indexes, valid_nms_scores = top_class_pred_scores[
                valid_mask], top_class_pred_indexes[valid_mask], nms_scores[
                    valid_mask]
            valid_bbox_pred = top_bbox_pred[valid_mask, :]

            keep_idx = cv_ops.batched_nms(valid_bbox_pred, valid_nms_scores,
                                          valid_class_pred_indexes, iou_thr)
            keep_class_pred_scores, keep_class_pred_indexes = valid_class_pred_scores[
                keep_idx], valid_class_pred_indexes[keep_idx]
            keep_bbox_pred = valid_bbox_pred[keep_idx, :]

            oh, ow = img_info_list[batch_idx]['height'], img_info_list[
                batch_idx]['width']
            keep_bbox_pred = bbox_ops.recover_bboxes(keep_bbox_pred, oh, ow,
                                                     ih, iw)
            keep_bbox_pred = cv_ops.box_convert(keep_bbox_pred,
                                                in_fmt='xyxy',
                                                out_fmt='xywh')

            for cls_score, cls_idx, bbox in zip(keep_class_pred_scores,
                                                keep_class_pred_indexes,
                                                keep_bbox_pred):
                pred_instances.append({
                    'image_id':
                    int(data['img_id'][batch_idx]),
                    'category_id':
                    int(cls_idx) + 1,
                    'bbox':
                    [float(str('%.1f' % coord)) for coord in bbox.tolist()],
                    'score':
                    float(str('%.1f' % cls_score))
                })

    dist_logger.save_pred_instances_local_rank(pred_instances)
    dist_logger.save_val_file()
    dist_logger.update_tensorboard_val_results(coco_gt, epoch_idx)
Exemplo n.º 12
0
def val_one_epoch(model, data_loader, coco_gt, dist_logger, epoch_idx, nms_cfg):
    pred_instances = []
    nms_pre, cls_score_thr, iou_thr = nms_cfg['nms_pre'], nms_cfg['cls_score_thr'], nms_cfg['iou_thr']
    _, _, label_to_cat_map = tools.get_cat_label_map(coco_gt, tools.COCO_CLASSES)
    # print(label_to_cat_map)

    model.eval()
    processor = tqdm.tqdm(data_loader, disable=not dist_logger.is_master_rank)
    for img, points, img_ids in processor:
        img = img.cuda(non_blocking=True)
        points = points.cuda(non_blocking=True)
        img_info_list = coco_gt.loadImgs(img_ids.numpy())

        pred = model(img, points)
        class_pred = pred['class'].sigmoid()  # [B, num_points, num_classes]
        centerness_pred = pred['centerness'].sigmoid()  # [B, num_points]
        bbox_pred = bbox_ops.convert_distance_to_bbox(points, pred['distance'])  # [B, num_points, 4]
        # instance_mask_pred = pred['instance_mask'].sigmoid()  # [B, num_points, pooler_size, pooler_size]

        # print(class_pred.shape, centerness_pred.shape, bbox_pred.shape, instance_mask_pred.shape)
        # exit(-1)

        cls_pred_scores, cls_pred_indexes = class_pred.max(dim=-1)  # [B, num_points]

        batch_size, _, num_classes = class_pred.shape
        _, _, ih, iw = img.shape

        for batch_idx in range(batch_size):
            b_cls_pred_scores = cls_pred_scores[batch_idx]
            b_cls_pred_indexes = cls_pred_indexes[batch_idx]
            b_centerness_pred = centerness_pred[batch_idx]
            b_bbox_pred = bbox_pred[batch_idx, :]  # [num_points, 4]

            _, top_idx = (b_cls_pred_scores * b_centerness_pred).topk(nms_pre)
            top_class_pred_scores = b_cls_pred_scores[top_idx]
            top_class_pred_indexes = b_cls_pred_indexes[top_idx]
            top_centerness_pred = b_centerness_pred[top_idx]
            top_bbox_pred = b_bbox_pred[top_idx, :]  # [topk, 4]

            nms_scores = top_class_pred_scores * top_centerness_pred
            top_bbox_pred = cv_ops.clip_boxes_to_image(top_bbox_pred, size=(ih, iw))

            valid_mask = top_class_pred_scores > cls_score_thr
            valid_class_pred_scores = top_class_pred_scores[valid_mask]
            valid_class_pred_indexes = top_class_pred_indexes[valid_mask]
            valid_nms_scores = nms_scores[valid_mask]
            valid_bbox_pred = top_bbox_pred[valid_mask, :]

            keep_idx = cv_ops.batched_nms(valid_bbox_pred, valid_nms_scores, valid_class_pred_indexes, iou_thr)
            keep_class_pred_scores = valid_class_pred_scores[keep_idx]
            keep_class_pred_indexes = valid_class_pred_indexes[keep_idx]
            keep_bbox_pred = valid_bbox_pred[keep_idx, :]

            oh, ow = img_info_list[batch_idx]['height'], img_info_list[batch_idx]['width']
            keep_bbox_pred = bbox_ops.recover_bboxes(keep_bbox_pred, oh, ow, ih, iw)
            keep_bbox_pred = cv_ops.box_convert(keep_bbox_pred, in_fmt='xyxy', out_fmt='xywh')

            for cls_score, cls_idx, bbox in zip(keep_class_pred_scores, keep_class_pred_indexes, keep_bbox_pred):
                # poly = coco_mask.frPyObjects(poly.permute(1, 0).reshape(1, -1).detach().cpu().double().numpy(), oh, ow)
                # rle = coco_mask.merge(poly)
                # rle['counts'] = rle['counts'].decode('utf-8')

                pred_instances.append({
                    'image_id': int(img_ids[batch_idx]),
                    'category_id': label_to_cat_map[int(cls_idx) + 1],
                    'bbox': [float(str('%.1f' % coord)) for coord in bbox.tolist()],
                    # 'segmentation': rle,
                    'score': float(str('%.1f' % cls_score))
                })

    dist_logger.save_pred_instances_local_rank(pred_instances)
    dist_logger.save_val_file()
    dist_logger.evaluate(coco_gt)
Exemplo n.º 13
0
def plot_planogram_eval(img_dir, test_imgs, test_annotations, planos, datatype,
                        load_classifier_index, plano_idx, gln_state,
                        dihe_state):
    '''
    Visualize planogram compliance evaluation steps.

    Does the proposed planogram compliance evaluation procedure step-by-step,
    plotting a visualization for each step.
    '''
    if datatype == 'gp':
        planoset = datautils.PlanogramTestSet(test_imgs, test_annotations,
                                              planos)
        sampleset = datautils.GroceryProductsDataset(img_dir,
                                                     include_annotations=True)
        rebuildset = datautils.GroceryProductsDataset(img_dir,
                                                      include_annotations=True,
                                                      resize=False)
    else:
        planoset = datautils.InternalPlanoSet(planos)
        sampleset = datautils.InternalTrainSet(img_dir[0],
                                               include_annotations=True)
        rebuildset = datautils.InternalTrainSet(img_dir[0],
                                                include_annotations=True,
                                                resize=False)

    proposal_generator = proposals_eval.load_gln(gln_state, False)
    proposal_generator.requires_grad_(False)

    encoder = classification.macvgg_embedder(model='vgg16',
                                             pretrained=False).cuda()
    enc_state = torch.load(dihe_state)
    encoder.load_state_dict(
        enc_state[classification_training.EMBEDDER_STATE_DICT_KEY])
    encoder.eval()
    encoder.requires_grad_(False)
    del enc_state

    datum = planoset[plano_idx] if plano_idx is not None else random.choice(
        planoset)
    if datatype == 'gp':
        image, _, _, expected = datum
    else:
        image, expected = datum
    generator = production.ProposalGenerator(proposal_generator)
    classifier = production.Classifier(encoder,
                                       sampleset,
                                       batch_size=8,
                                       load=load_classifier_index)

    boxes, images = generator.generate_proposals_and_images(image)
    boxes = boxes.detach().cpu()
    classes = [ann[0] for ann in classifier.classify(images)]
    actual = {'boxes': boxes, 'labels': classes}

    h, w = image.shape[1:]
    reproj_threshold = min(h, w) * 0.01

    maxy = boxes[:, 3].max().item()
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(
        12, 12)) if image.shape[2] < image.shape[1] else plt.subplots(
            2, 1, figsize=(12, 12))
    utils.build_fig(image,
                    detections=tvops.box_convert(boxes, 'xyxy', 'xywh'),
                    ax=ax1)
    utils.build_rebuild(boxes, classes, rebuildset, maxy, ax=ax2)

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(
        12, 12)) if image.shape[2] < image.shape[1] else plt.subplots(
            2, 1, figsize=(12, 12))
    ge = expected['graph'] if 'graph' in expected else planograms.build_graph(
        expected['boxes'], expected['labels'], 0.5)
    ga = planograms.build_graph(actual['boxes'], actual['labels'], 0.5)
    utils.build_rebuild(expected['boxes'],
                        expected['labels'],
                        rebuildset,
                        ax=ax1)
    utils.draw_planograph(ge, expected['boxes'], ax=ax1, flip_y=True)
    utils.build_rebuild(boxes, classes, rebuildset, maxy, ax=ax2)
    utils.draw_planograph(ga, actual['boxes'], ax=ax2, flip_y=True)

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(
        12, 12)) if image.shape[2] < image.shape[1] else plt.subplots(
            2, 1, figsize=(12, 12))
    matching = planograms.large_common_subgraph(ge, ga)
    nodes_e, nodes_a = (list(l)
                        for l in zip(*matching)) if len(matching) else ([], [])
    sge = ge.subgraph(nodes_e)
    sga = ga.subgraph(nodes_a)
    utils.build_rebuild(expected['boxes'],
                        expected['labels'],
                        rebuildset,
                        ax=ax1)
    utils.draw_planograph(sge, expected['boxes'], ax=ax1, flip_y=True)
    utils.build_rebuild(boxes, classes, rebuildset, maxy, ax=ax2)
    utils.draw_planograph(sga, actual['boxes'], ax=ax2, flip_y=True)
    if not len(matching):
        plt.show()
        return

    # fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 12)) if image.shape[2] < image.shape[1] else plt.subplots(2, 1, figsize=(12, 12))
    found, found_actual, missing_indices, missing_positions, missing_labels = planograms.finalize_via_ransac(
        matching,
        expected['boxes'],
        actual['boxes'],
        expected['labels'],
        actual['labels'],
        reproj_threshold=reproj_threshold,
        return_matched_actual=True)
    missing_positions = tvops.clip_boxes_to_image(missing_positions,
                                                  image.shape[1:])
    valid_positions = (missing_positions[:, 2] - missing_positions[:, 0] >
                       1) & (missing_positions[:, 3] - missing_positions[:, 1]
                             > 1)
    missing_indices = missing_indices[valid_positions]
    missing_positions = missing_positions[valid_positions]
    missing_labels = [l for l, v in zip(missing_labels, valid_positions) if v]

    if len(missing_positions) > 0:
        found_round2 = torch.full((len(missing_indices), ), False)
        missing_imgs = torch.stack([
            datautils.resize_for_classification(image[:, y1:y2, x1:x2])
            for x1, y1, x2, y2 in missing_positions.to(dtype=torch.long)
        ])
        reclass_labels = classifier.classify(missing_imgs)
        for idx, (expected_label, actual_label) in enumerate(
                zip(missing_labels, reclass_labels)):
            if expected_label == actual_label[0]:
                found_round2[idx] = True
    utils.build_fig(
        image,
        groundtruth=tvops.box_convert(actual['boxes'][found_actual], 'xyxy',
                                      'xywh'),
        detections=tvops.box_convert(missing_positions, 'xyxy', 'xywh'),
    )
    if len(missing_positions) > 0:
        utils.plot_boxes(tvops.box_convert(missing_positions[found_round2],
                                           'xyxy', 'xywh'),
                         color='yellow',
                         hl_color='orange')

    plt.show()
Exemplo n.º 14
0
def pipeline_demo(gln_state, dihe_state, dataset_folder, image_file,
                  plano_file):
    '''
    Demonstrate the CVPCE pipeline.

    The dataset folder is expected to contain .png, .jpg and .jpeg files,
    one for each class, with the class label set as the filename.

    The planogram file should be a JSON file formatted in a manner such as

    \b
    [
        {
            "label": "class1",
            "box": [0, 0, 5, 5]
        },
        {
            "label": "class2",
            "box": [5, 0, 10, 5]
        }
    ]

    (in this case, the dataset folder should contain files for class1 and class2,
    such as class1.png and class2.jpg)
    '''

    # TODO: This shares a bunch of code with cvpce plot-planogram-eval; will want to refactor a bit at some point

    def double_fig(img):
        return plt.subplots(1, 2, figsize=(
            12, 12)) if img.shape[2] < img.shape[1] else plt.subplots(
                2, 1, figsize=(12, 12))

    dataset = SimpleFolderSet(dataset_folder)
    rebuildset = SimpleFolderSet(dataset_folder, train=False)

    state_dict = torch.load(gln_state)[MODEL_STATE_DICT_KEY]
    gln = proposals.gln().cuda()
    gln.load_state_dict(state_dict)
    gln.eval()
    gln.requires_grad_(False)
    generator = ProposalGenerator(gln)

    img = ttf.to_tensor(pil.Image.open(image_file))
    detections, images = generator.generate_proposals_and_images(img)

    encoder = classification.macvgg_embedder(model='vgg16',
                                             pretrained=False).cuda()
    enc_state = torch.load(dihe_state)
    encoder.load_state_dict(enc_state[EMBEDDER_STATE_DICT_KEY])
    encoder.eval()
    encoder.requires_grad_(False)
    classifier = Classifier(encoder, dataset)

    classes, embedding = classifier.classify(images, return_embedding=True)

    with open(plano_file) as pf:
        plano = json.load(pf)
    expected_boxes = torch.tensor([o['box'] for o in plano], dtype=torch.float)
    expected_labels = [o['label'] for o in plano]
    actual_boxes = detections.detach().cpu()
    actual_labels = [c[0] for c in classes]
    ge = planograms.build_graph(expected_boxes,
                                expected_labels,
                                thresh_size=0.7)
    ga = planograms.build_graph(actual_boxes, actual_labels, thresh_size=0.7)

    matching = planograms.large_common_subgraph(ge, ga)
    nodes_e, nodes_a = (list(l)
                        for l in zip(*matching)) if len(matching) else ([], [])
    sge = ge.subgraph(nodes_e)
    sga = ga.subgraph(nodes_a)

    h, w = img.shape[1:]
    reproj_threshold = min(h, w) * 0.01

    _, found_actual, expected_positions, missing_indices, missing_positions, missing_labels = planograms.finalize_via_ransac(
        matching,
        expected_boxes,
        actual_boxes,
        expected_labels,
        actual_labels,
        reproj_threshold=reproj_threshold,
        return_matched_actual=True,
        return_expected_positions=True)
    missing_positions = tvops.clip_boxes_to_image(missing_positions,
                                                  img.shape[1:])
    valid_positions = (missing_positions[:, 2] - missing_positions[:, 0] >
                       1) & (missing_positions[:, 3] - missing_positions[:, 1]
                             > 1)
    missing_indices = missing_indices[valid_positions]
    missing_positions = missing_positions[valid_positions]
    missing_labels = [l for l, v in zip(missing_labels, valid_positions) if v]

    if len(missing_positions) > 0:
        found_round2 = torch.full((len(missing_indices), ), False)
        missing_imgs = torch.stack([
            resize_for_classification(img[:, y1:y2, x1:x2])
            for x1, y1, x2, y2 in missing_positions.to(dtype=torch.long)
        ])
        reclass_labels = classifier.classify(missing_imgs)
        for idx, (expected_label, actual_label) in enumerate(
                zip(missing_labels, reclass_labels)):
            if expected_label == actual_label[0]:
                found_round2[idx] = True

    _, (ax1, ax2) = double_fig(img)
    utils.build_fig(img, ax=ax1)
    utils.build_rebuild(expected_boxes, expected_labels, rebuildset, ax=ax2)
    ax1.set_title('Image to evaluate')
    ax2.set_title('Planogram')
    plt.show()

    utils.show(
        img, utils.recall_tensor(tvops.box_convert(detections, 'xyxy',
                                                   'xywh')))

    data_imgs = torch.stack([i for i, _, _, _ in dataset])
    utils.show_demo_emb_fig(data_imgs,
                            classifier.embedding,
                            images,
                            embedding,
                            draw_positives=False)

    utils.show_demo_emb_fig(data_imgs, classifier.embedding, images, embedding)

    _, (ax1, ax2) = double_fig(img)
    utils.build_fig(img, ax=ax1)
    utils.build_rebuild(detections, actual_labels, rebuildset, ax=ax2)
    ax1.set_title('Image')
    ax2.set_title('Classified detections = "Observed planogram"')
    plt.show()

    _, (ax1, ax2) = double_fig(img)
    utils.build_rebuild(expected_boxes, expected_labels, rebuildset, ax=ax1)
    utils.draw_planograph(ge, expected_boxes, ax=ax1, flip_y=True)
    utils.build_rebuild(actual_boxes, actual_labels, rebuildset, ax=ax2)
    utils.draw_planograph(ga, actual_boxes, ax=ax2, flip_y=True)
    ax1.set_title('Expected planogram')
    ax2.set_title('Observed planogram')
    plt.show()

    _, (ax1, ax2) = double_fig(img)
    utils.build_rebuild(expected_boxes, expected_labels, rebuildset, ax=ax1)
    utils.draw_planograph(sge, expected_boxes, ax=ax1, flip_y=True)
    utils.build_rebuild(actual_boxes, actual_labels, rebuildset, ax=ax2)
    utils.draw_planograph(sga, actual_boxes, ax=ax2, flip_y=True)
    ax1.set_title('Expected planogram')
    ax2.set_title('Observed planogram')
    plt.show()

    utils.show(img, tvops.box_convert(expected_positions, 'xyxy', 'xywh'))

    utils.build_fig(
        img,
        groundtruth=tvops.box_convert(actual_boxes[found_actual], 'xyxy',
                                      'xywh'),
        detections=tvops.box_convert(missing_positions, 'xyxy', 'xywh'),
    )
    if len(missing_positions) > 0:
        utils.plot_boxes(tvops.box_convert(missing_positions[found_round2],
                                           'xyxy', 'xywh'),
                         color='yellow',
                         hl_color='orange')
    plt.show()