Exemplo n.º 1
0
def test_relative_and_absolute_anchor():
    gtas = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 2, 5], [5, 0, 7, 7],
                     [0, 10, 8, 100], [10, 11, 15, 20]],
                    dtype=np.float64)

    ancs = np.array([[0, 0, 1, 1], [2, 1, 3, 3], [4, 2, 10, 8], [3, 3, 10, 15],
                     [2, 8, 5, 120], [1, 1, 2, 2]],
                    dtype=np.float64)
    txtytwth = to_relative_coord_np(gtas, ancs)

    # Test to_absolute_coord
    gtas_pred = list()
    for anc, regr in zip(ancs, txtytwth):
        xywh = to_absolute_coord(anc, regr)
        xywh = list(xywh)
        xywh[0] -= xywh[2] / 2.
        xywh[1] -= xywh[3] / 2.
        xywh[2] += xywh[0]
        xywh[3] += xywh[1]
        gtas_pred.append(xywh)

    gtas_pred = np.array(gtas_pred)
    assert (gtas_pred == gtas).all()

    # Test apply_regression_to_roi
    mxmywh = ancs.copy()
    mxmywh[:, 2] = mxmywh[:, 2] - mxmywh[:, 0]  # to width
    mxmywh[:, 3] = mxmywh[:, 3] - mxmywh[:, 1]  # to height
    cxcywh = apply_regression_to_rois(txtytwth, mxmywh).astype(np.float64)
    anchors = cxcywh
    anchors[:, 0] -= anchors[:, 2] / 2.
    anchors[:, 1] -= anchors[:, 3] / 2.
    anchors[:, 2] += anchors[:, 0]
    anchors[:, 3] += anchors[:, 1]
    assert (anchors == gtas).all()
Exemplo n.º 2
0
    def debug_next_batch(cls, image, meta, clsf, regr):
        config = singleton_config()

        # Calculate Scales
        scales = calculate_anchor_size()

        height, width, _ = image.shape
        image = denormalize_image(image)

        # Check Classification
        cls_h, cls_w, cls_o = np.where(
            np.logical_and(clsf[0, :, :, :9] == 1, clsf[0, :, :, 9:] == 1))
        regr = regr[0].copy()

        for i in range(len(cls_h)):
            loc_w = cls_w[i]
            loc_h = cls_h[i]
            loc_o = cls_o[i]

            cw = (loc_w) * config.anchor_stride[0]
            ch = (loc_h) * config.anchor_stride[1]

            anc_w, anc_h = scales[loc_o]

            cw = int(cw)
            ch = int(ch)
            cv2.rectangle(image, (cw, ch), (cw + 5, ch + 5), (255, 255, 0))

            min_x = cw - anc_w / 2
            min_y = ch - anc_h / 2
            max_x = cw + anc_w / 2
            max_y = ch + anc_h / 2

            min_x = int(min_x)
            min_y = int(min_y)
            max_x = int(max_x)
            max_y = int(max_y)

            tx, ty, tw, th = regr[loc_h, loc_w,
                                  (loc_o * 4) + 36:(loc_o * 4) + 4 + 36]
            g_cx, g_cy, g_w, g_h = to_absolute_coord(
                [min_x, min_y, max_x, max_y], [tx, ty, tw, th])
            g_x1 = int(g_cx - g_w / 2)
            g_y1 = int(g_cy - g_h / 2)
            g_x2 = int(g_x1 + g_w)
            g_y2 = int(g_y1 + g_h)

            cv2.rectangle(image, (g_x1, g_y1), (g_x2, g_y2), (255, 255, 0),
                          thickness=3)

        # Visualize GTA
        visualize_gta(image, meta)
        cv2.imwrite('temp/' + meta['filename'], image)
Exemplo n.º 3
0
    def apply(_image, x_pos, y_pos, anc_scale: int, anc_rat: List[float],
              reg_target):
        config = singleton_config()
        w, h = anc_scale * anc_rat[0], anc_scale * anc_rat[1]
        min_x = int(x_pos * config.anchor_stride[0] - w / 2)
        min_y = int(y_pos * config.anchor_stride[1] - h / 2)
        max_x = int(min_x + w)
        max_y = int(min_y + h)

        g_cx, g_cy, g_w, g_h = to_absolute_coord([min_x, min_y, max_x, max_y],
                                                 reg_target)
        g_x1 = int(g_cx - g_w / 2)
        g_y1 = int(g_cy - g_h / 2)
        g_x2 = int(g_cx + g_w / 2)
        g_y2 = int(g_cy + g_h / 2)

        cv2.rectangle(_image, (g_x1, g_y1), (g_x2, g_y2), (0, 0, 255))
Exemplo n.º 4
0
    def generate_rpn_target(
            self,
            meta: dict,
            image: np.ndarray = None,
            debug: bool = False) -> Tuple[np.ndarray, np.ndarray]:

        width = meta['width']
        height = meta['height']
        rescaled_width = meta['rescaled_width']
        rescaled_height = meta['rescaled_height']
        n_object = len(meta['objects'])
        n_ratio = len(self.anchor_ratios)
        n_anchor = len(self.anchor_ratios) * len(self.anchor_scales)

        # Calculate output size of Base Network (feature extraction model)
        fen_width, fen_height, _ = cal_fen_output_size(self._net_name,
                                                       rescaled_width,
                                                       rescaled_height)

        # Tracking best things
        best_iou_for_box = np.zeros(n_object)
        best_anchor_for_box = -1 * np.ones((n_object, 4), dtype='int')
        best_reg_for_box = np.zeros((n_object, 4), dtype='float32')
        n_pos_anchor_for_box = np.zeros(n_object)

        # Classification Target Data
        y_cls_target = np.zeros((fen_height, fen_width, n_anchor))
        y_valid_box = np.zeros((fen_height, fen_width, n_anchor))
        y_regr_targets = np.zeros((fen_height, fen_width, n_anchor * 4))

        _comb = [
            range(fen_height),
            range(fen_width),
            range(len(self.anchor_scales)),
            range(len(self.anchor_ratios)),
            range(n_object)
        ]

        # DEBUG
        if debug:
            _image = denormalize_image(image[0].copy())

        for y_pos, x_pos, anc_scale_idx, anc_rat_idx, idx_obj in itertools.product(
                *_comb):
            anc_scale = self.anchor_scales[anc_scale_idx]
            anc_rat = self.anchor_ratios[anc_rat_idx]

            # ground-truth box coordinates on the rescaled image
            obj_info = meta['objects'][idx_obj]
            gta_coord = self.cal_gta_coordinate(obj_info[1:], width, height,
                                                rescaled_width,
                                                rescaled_height)

            if debug:
                gta_coord = gta_coord.astype('int')
                cv2.rectangle(_image, (gta_coord[0], gta_coord[1]),
                              (gta_coord[2], gta_coord[3]), (0, 0, 255))

            # anchor box coordinates on the rescaled image
            anchor_coord = self.cal_anchor_cooridinate(x_pos, y_pos, anc_scale,
                                                       anc_rat,
                                                       self.anchor_stride)

            # Check if the anchor is within the rescaled image
            _valid_anchor = self.is_anchor_valid(anchor_coord, rescaled_width,
                                                 rescaled_height)
            if not _valid_anchor:
                continue

            # Calculate Intersection Over Union
            iou = cal_iou(gta_coord, anchor_coord)

            # DEBUG
            # r = np.random.randint(4, size=4)
            # reg_target = to_relative_coord(gta_coord, anchor_coord)
            # g_cx, g_cy, g_w, g_h = to_absolute_coord(anchor_coord, reg_target)
            # g_x1 = int(g_cx - g_w / 2)
            # g_y1 = int(g_cy - g_h / 2)
            # g_x2 = int(g_x1 + g_w)
            # g_y2 = int(g_y1 + g_h)
            # cv2.rectangle(_image, (g_x1 + 2 + r[0], g_y1 + 2 + r[1]), (g_x2 + 2 + r[2], g_y2 + 2 + r[3]), (255, 255, 0))

            # Calculate regression target
            if iou > best_iou_for_box[idx_obj] or iou > self.max_overlap:
                # The regression target fit to the rescaled image
                reg_target = to_relative_coord(gta_coord, anchor_coord)

            # Ground-truth bounding box should be mapped to at least one anchor box.
            # So tracking the best anchor should be implemented
            if iou > best_iou_for_box[idx_obj]:
                best_iou_for_box[idx_obj] = iou
                best_anchor_for_box[idx_obj] = (y_pos, x_pos, anc_scale_idx,
                                                anc_rat_idx)
                best_reg_for_box[idx_obj] = reg_target

                # if debug:
                #     g_cx, g_cy, g_w, g_h = to_absolute_coord(anchor_coord , reg_target)
                #     g_x1 = int(g_cx - g_w / 2)
                #     g_y1 = int(g_cy - g_h / 2)
                #     g_x2 = int(g_x1 + g_w)
                #     g_y2 = int(g_y1 + g_h)
                #     cv2.rectangle(_image, (g_x1+2, g_y1+2), (g_x2+2, g_y2+2), (255, 255, 0))

            # Anchor is positive (the anchor refers to an ground-truth object) if iou > 0.5~0.7
            # is_valid_anchor: this flag prevents overwriting existing valid anchor (due to the for-loop of objects)
            #                  if the anchor meets overlap_max or overlap_min, it should not be changed.
            z_pos = anc_scale_idx + n_ratio * anc_rat_idx
            is_valid_anchor = bool(y_valid_box[y_pos, x_pos, z_pos] == 1)

            if iou > self.max_overlap:  # Positive anchors
                n_pos_anchor_for_box[idx_obj] += 1
                y_valid_box[y_pos, x_pos, z_pos] = 1
                y_cls_target[y_pos, x_pos, z_pos] = 1
                y_regr_targets[y_pos, x_pos,
                               (z_pos * 4):(z_pos * 4) + 4] = reg_target
                # if debug:
                #     TestRPN.apply(_image, x_pos, y_pos, anc_scale, anc_rat, reg_target)

                if debug:
                    g_cx, g_cy, g_w, g_h = to_absolute_coord(
                        anchor_coord, reg_target)
                    g_x1 = int(g_cx - g_w / 2)
                    g_y1 = int(g_cy - g_h / 2)
                    g_x2 = int(g_x1 + g_w)
                    g_y2 = int(g_y1 + g_h)
                    cv2.rectangle(_image, (g_x1 + 2, g_y1 + 2),
                                  (g_x2 + 2, g_y2 + 2), (255, 255, 0),
                                  thickness=2)

            elif iou < self.min_overlap and not is_valid_anchor:  # Negative anchors
                y_valid_box[y_pos, x_pos, z_pos] = 1
                y_cls_target[y_pos, x_pos, z_pos] = 0

            elif not is_valid_anchor:
                y_valid_box[y_pos, x_pos, z_pos] = 0
                y_cls_target[y_pos, x_pos, z_pos] = 0

        # Limit Y class target
        pos_locs = np.where(y_cls_target == 1)
        if pos_locs[0].shape[0] > 256:
            val_locs = random.sample(range(len(pos_locs[0])),
                                     len(pos_locs[0]) - 256)
            y_cls_target[pos_locs[0][val_locs], pos_locs[1][val_locs],
                         pos_locs[2][val_locs]] = 0

        assert y_cls_target.sum() <= 256

        # Ensure a ground-truth bounding box is mapped to at least one anchor
        for i in range(n_object):
            if n_pos_anchor_for_box[i] == 0 or True:
                y_pos, x_pos, anc_scale_idx, anc_rat_idx = best_anchor_for_box[
                    i]
                z_pos = anc_scale_idx + n_ratio * anc_rat_idx
                reg_target = best_reg_for_box[i]

                y_valid_box[y_pos, x_pos, z_pos] = 1
                y_cls_target[y_pos, x_pos, z_pos] = 1
                y_regr_targets[y_pos, x_pos,
                               (z_pos * 4):(z_pos * 4) + 4] = reg_target

                # if debug:
                #     anc_scale = self.anchor_scales[anc_scale_idx]
                #     anc_rat = self.anchor_ratios[anc_rat_idx]
                #     anchor_coord = self.cal_anchor_cooridinate(x_pos, y_pos, anc_scale, anc_rat, self.anchor_stride)
                #     g_cx, g_cy, g_w, g_h = to_absolute_coord(anchor_coord, reg_target)
                #     g_x1 = int(g_cx - g_w / 2)
                #     g_y1 = int(g_cy - g_h / 2)
                #     g_x2 = int(g_x1 + g_w)
                #     g_y2 = int(g_y1 + g_h)
                #     cv2.rectangle(_image, (g_x1 + 2, g_y1 + 2), (g_x2 + 2, g_y2 + 2), (255, 255, 0))

        # It is more likely to have more negative anchors than positive anchors.
        # The ratio between negative and positive anchors should be equal.
        pos_locs = np.where(np.logical_and(y_valid_box == 1,
                                           y_cls_target == 1))
        neg_locs = np.where(np.logical_and(y_valid_box == 1,
                                           y_cls_target == 0))
        n_pos = pos_locs[0].shape[0]
        n_neg = neg_locs[0].shape[0]

        if len(pos_locs[0]) > self.max_anchor / 2:
            val_locs = random.sample(range(len(pos_locs[0])),
                                     len(pos_locs[0]) - self.max_anchor // 2)
            y_valid_box[pos_locs[0][val_locs], pos_locs[1][val_locs],
                        pos_locs[2][val_locs]] = 0
            n_pos = self.max_anchor // 2

        if n_neg + n_pos > self.max_anchor:
            val_locs = random.sample(range(len(neg_locs[0])), n_neg - n_pos)
            y_valid_box[neg_locs[0][val_locs], neg_locs[1][val_locs],
                        neg_locs[2][val_locs]] = 0

        # Add batch dimension
        y_cls_target = np.expand_dims(y_cls_target, axis=0)
        y_valid_box = np.expand_dims(y_valid_box, axis=0)
        y_regr_targets = np.expand_dims(y_regr_targets, axis=0)

        # Debug
        if debug:
            cv2.imwrite('temp/' + meta['filename'], _image)

        # Final target data
        # Classification loss in RPN only uses y_valid_box.
        # Regression loss in RPN only uses y_regr_targets.
        y_rpn_cls = np.concatenate([y_valid_box, y_cls_target], axis=-1)
        y_rpn_regr = np.concatenate(
            [np.repeat(y_cls_target, 4, axis=-1), y_regr_targets], axis=-1)

        # cv2.imwrite('temp/{0}.png'.format(datum['filename']), image)
        return np.copy(y_rpn_cls), np.copy(y_rpn_regr)