示例#1
0
    def __init__(self,
                 size,
                 stride,
                 ratios=None,
                 scales=None,
                 *args,
                 **kwargs):
        self.size = size
        self.stride = stride
        self.ratios = ratios
        self.scales = scales

        if ratios is None:
            self.ratios = np.array([0.5, 1, 2], 'float32'),
        elif isinstance(ratios, list):
            self.ratios = np.array(ratios)
        if scales is None:
            self.scales = np.array([2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)],
                                   'float32'),
        elif isinstance(scales, list):
            self.scales = np.array(scales)

        self.num_anchors = len(ratios) * len(scales)
        self.anchors = keras.backend.variable(
            utils_anchors.generate_anchors(
                base_size=size,
                ratios=ratios,
                scales=scales,
            ))

        super(Anchors, self).__init__(*args, **kwargs)
示例#2
0
    def __init__(self,
                 image_height,
                 image_width,
                 num_classes,
                 feature_map_sizes,
                 scales,
                 aspect_ratios,
                 two_boxes_for_ar1,
                 steps,
                 offsets,
                 clip_boxes,
                 variances,
                 iou_thresh_high,
                 iou_thresh_low,
                 background_id=0):
        self.image_height = image_height
        self.image_width = image_width
        self.num_classes = num_classes + 1
        self.feature_map_sizes = feature_map_sizes
        self.scales = scales
        self.aspect_ratios = aspect_ratios
        self.two_boxes_for_ar1 = two_boxes_for_ar1
        self.steps = steps
        self.offsets = offsets
        self.clip_boxes = clip_boxes
        self.variances = variances
        self.iou_thresh_high = iou_thresh_high
        self.iouthresh_low = iou_thresh_low
        self.background_id = background_id

        # generate anchors
        self.boxes_list = []
        for i in range(len(self.feature_map_sizes)):
            box_tensor = generate_anchors(
                self.image_height,
                self.image_width,
                self.feature_map_sizes[i][0],
                self.feature_map_sizes[i][1],
                self.scales[i],
                self.scales[i + 1],
                self.steps[i],
                self.aspect_ratios[i],
                two_boxes_for_ar1=self.two_boxes_for_ar1,
                offset=self.offsets[i],
                normalize_coord=True,
                clip_boundary=True)
            # each is of shape (feat_size, feat_size, nbox, 4) in (xmin, ymin, xmax, ymax) form
            self.boxes_list.append(box_tensor)
示例#3
0
    def forward(self, x, img_width, img_height):
        """
        get the cls and reg head output of the rpn, and select the rois
        Inputs:
            img_widths, img_heights: the image size
        Outputs:
            cls: output of the cls head. [N,2, KHW]
            reg: output of the cls head. [N,KHW, 4]
            rois: selected rois. [N*post_thre, 4]
            roi_inds: batch index of the selected rois. [N*post_thre]
        """
        n_img, n_channel, conv_h, conv_w = x.shape
        x = self.conv1(x)
        x = F.relu(x)

        cls = self.cls(x)
        cls = cls.permute(0, 2, 3, 1).contiguous().view(n_img, -1, 2)
        cls_fg_softmax = F.softmax(cls, dim=-1)[:, :,
                                                1].contiguous()  # [N,w*h,1]
        cls_fg_softmax = cls_fg_softmax.view(n_img, -1)
        cls = cls.permute(0, 2, 1)  # ![N, C, KWH] for loss calculation

        reg = self.reg(x)  # [N,K*4,H,W]
        reg = reg.permute(0, 2, 3, 1).contiguous().view(n_img, -1,
                                                        4)  # [N,KHW,4]

        # generate the base anchors from the conv5 layer
        self.anchors = generate_anchors(self.base_anchor,
                                        self.feat_stride,
                                        width=conv_w,
                                        height=conv_h)

        rois = []  # the selected rois
        roi_inds = []  # mark each roi belong to which image
        for img_ind in range(n_img):
            roi = self.proposal_layer(self.anchors, cls_fg_softmax[img_ind],
                                      reg[img_ind], img_width,
                                      img_height)  # [post_thre, 4]
            rois.append(roi)
            roi_inds.append(img_ind * torch.ones(len(roi)))
        rois = torch.cat(rois, dim=0)  # [N*post_thre, 4]
        roi_inds = torch.cat(roi_inds, dim=0)  # [N*post_thre]

        return cls, reg, rois, roi_inds, self.anchors
示例#4
0
    def __init__(self,
                 size,
                 stride,
                 ratios=None,
                 scales=None,
                 *args,
                 **kwargs):
        """!@brief
        Initializer for an Anchors layer.

        @param size   : The base size of the anchors to generate.
        @param stride : The stride of the anchors to generate.
        @param ratios : The ratios of the anchors to generate (defaults to
                        AnchorParameters.default.ratios).
        @param scales : The scales of the anchors to generate (defaults to
                        AnchorParameters.default.scales).
        """
        self.size = size
        self.stride = stride
        self.ratios = ratios
        self.scales = scales

        if ratios is None:
            self.ratios = utils_anchors.AnchorParameters.default.ratios
        elif isinstance(ratios, list):
            self.ratios = np.array(ratios)
        if scales is None:
            self.scales = utils_anchors.AnchorParameters.default.scales
        elif isinstance(scales, list):
            self.scales = np.array(scales)

        self.num_anchors = len(ratios) * len(scales)
        self.anchors = keras.backend.variable(
            utils_anchors.generate_anchors(
                base_size=size,
                ratios=ratios,
                scales=scales,
            ))

        super(Anchors, self).__init__(*args, **kwargs)
示例#5
0
    def __init__(self,
                 size,
                 stride,
                 ratios=None,
                 scales=None,
                 *args,
                 **kwargs):
        """ Initializer for an Anchors layer.

        Args
            size: The base size of the anchors to generate.
            stride: The stride of the anchors to generate.
            ratios: The ratios of the anchors to generate (defaults to [0.5, 1, 2]).
            scales: The scales of the anchors to generate (defaults to [2^0, 2^(1/3), 2^(2/3)]).
        """
        self.size = size
        self.stride = stride
        self.ratios = ratios
        self.scales = scales

        if ratios is None:
            self.ratios = np.array([0.5, 1, 2], keras.backend.floatx()),
        elif isinstance(ratios, list):
            self.ratios = np.array(ratios)
        if scales is None:
            self.scales = np.array([2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)],
                                   keras.backend.floatx()),
        elif isinstance(scales, list):
            self.scales = np.array(scales)

        self.num_anchors = len(ratios) * len(scales)
        self.anchors = keras.backend.variable(
            utils_anchors.generate_anchors(
                base_size=size,
                ratios=ratios,
                scales=scales,
            ))

        super(Anchors, self).__init__(*args, **kwargs)
示例#6
0
    def __init__(self,
                 size,
                 stride,
                 ratios=None,
                 scales=None,
                 *args,
                 **kwargs):
        """ Initializer for an Anchors layer.
		Args
			size: The base size of the anchors to generate.
			stride: The stride of the anchors to generate.
			ratios: The ratios of the anchors to generate (defaults to AnchorParameters.default.ratios).
			scales: The scales of the anchors to generate (defaults to AnchorParameters.default.scales).
		"""
        self.size = size
        self.stride = stride
        self.ratios = ratios
        self.scales = scales

        if ratios is None:
            self.ratios = utils_anchors.AnchorParameters.default.ratios
        elif isinstance(ratios, list):
            self.ratios = np.array(ratios)
        if scales is None:
            self.scales = utils_anchors.AnchorParameters.default.scales
        elif isinstance(scales, list):
            self.scales = np.array(scales)

        self.num_anchors = len(self.ratios) * len(self.scales)
        self.anchors = utils_anchors.generate_anchors(
            base_size=self.size,
            ratios=self.ratios,
            scales=self.scales,
        ).astype(np.float32)

        super(Anchors, self).__init__(*args, **kwargs)
示例#7
0
def get_pred_boxes(coord):
    anchors = generate_anchors()
    pred_boxes = box_transform_inv(anchors, coord)  # (x, y, w, h)
    return pred_boxes  # (H*W*A, 4)
示例#8
0
def yolo_loss(output_pred, ground_truth, height, width):
    '''

    :param output_pred: is Variable
    :param ground_truth:  is data
    :param height:
    :param width:
    :return:
    '''

    coord_pred = output_pred[0].data  # (16, 196*5, 4)  data
    conf_pred = output_pred[1].data  # (16, 196*5, 1)
    cls_pred = output_pred[2].data  # (16*196*5, 20)

    gt_boxes = ground_truth[0]  # (16, 6, 4), 6 is the num_obj
    gt_classes = ground_truth[1]  # (16, 6) data
    num_obj = ground_truth[2]  # (16, 1)

    batch_size = coord_pred.size(0)
    anchor_num = len(cfg.ANCHORS)

    cell_anchors_xywh = generate_anchors()  # (196*5, 4)

    anchors_xywh = cell_anchors_xywh.clone()
    anchors_xywh[:, 0:2] = anchors_xywh[:, 0:2] + 0.5

    if cfg.DEBUG:
        print('all cell:', cell_anchors_xywh[:12, :])
        print('all anchors:', anchors_xywh[:12, :])

    anchors_xyxy = xywh2xyxy(anchors_xywh)

    if torch.cuda.is_available():
        cell_anchors_xywh = cell_anchors_xywh.cuda()
        anchors_xyxy = anchors_xyxy.cuda()

    coord_target = coord_pred.new_zeros(
        (batch_size, height * width, anchor_num, 4))
    conf_target = conf_pred.new_zeros(
        (batch_size, height * width, anchor_num, 1))
    cls_target = cls_pred.new_zeros(
        (batch_size, height * width, anchor_num, 1))

    coord_mask = coord_pred.new_zeros(
        (batch_size, height * width, anchor_num, 1))
    conf_mask = conf_pred.new_ones(
        (batch_size, height * width, anchor_num, 1)) * cfg.NO_OBJECT_SCALE
    cls_mask = cls_pred.new_zeros((batch_size, height * width, anchor_num, 1))

    for i in range(batch_size):
        gt_num = num_obj[i].item()
        gt_boxes_xyxy = gt_boxes[i, :gt_num, :]
        gt_class = gt_classes[i, :gt_num]

        gt_boxes_xyxy[:, 0::2] = gt_boxes_xyxy[:, 0::2] * width
        gt_boxes_xyxy[:, 1::2] = gt_boxes_xyxy[:, 1::2] * height

        gt_boxes_xywh = xyxy2xywh(gt_boxes_xyxy)

        # 1. calculate the predicted box
        pred_box_xywh = box_transform_inv(cell_anchors_xywh, coord_pred[i])
        pred_box_xyxy = xywh2xyxy(pred_box_xywh)

        # 2. calculate the IOU between each pred_box and gt_boxes
        pred_gt_iou = box_overlaps(
            pred_box_xyxy, gt_boxes_xyxy)  # conf_target (pred_num, gt_num)
        pred_gt_iou = pred_gt_iou.view(-1, anchor_num, gt_num)

        max_iou, _ = torch.max(pred_gt_iou, dim=-1, keepdim=True)

        if cfg.DEBUG:
            print('ious:', pred_gt_iou)

        num_pos = torch.nonzero(max_iou.view(-1) > cfg.THRESH).numel()
        if num_pos > 0:
            conf_mask[i][max_iou >= cfg.THRESH] = 0

        # 3. calculate the IOU between gt_boxes and anchors
        anchors_gt_iou = box_overlaps(anchors_xyxy, gt_boxes_xyxy).view(
            -1, anchor_num,
            gt_num)  # decide which anchor is responsible for the gt_box

        # 4. iterate over each gt_boxes
        for j in range(gt_num):
            gt_box_xywh = gt_boxes_xywh[j, :]
            g_x = torch.floor(gt_box_xywh[0])
            g_y = torch.floor(gt_box_xywh[1])
            # cell_idxth cell is responsible for this gt_box
            cell_idx = (g_y * width + g_x).long()

            best_anchor = torch.argmax(anchors_gt_iou[cell_idx, :, j])

            assigned_cell_anchor = cell_anchors_xywh.view(
                -1, anchor_num, 4)[cell_idx, best_anchor, :].unsqueeze(0)
            gt_box = gt_box_xywh.unsqueeze(0)
            target = box_transform(assigned_cell_anchor, gt_box)

            if cfg.DEBUG:
                print('assigned cell:', assigned_cell_anchor)
                print('gt:', gt_box)
                print('target:', target)

            coord_target[i, cell_idx, best_anchor, :] = target.unsqueeze(0)
            coord_mask[i, cell_idx, best_anchor, :] = 1

            conf_target[i, cell_idx, best_anchor, :] = max_iou[cell_idx,
                                                               best_anchor, :]
            conf_mask[i, cell_idx, best_anchor, :] = cfg.OBJECT_SCALE

            if cfg.DEBUG:
                print('conf_target:', max_iou[cell_idx, best_anchor, :])

            cls_target[i, cell_idx, best_anchor, :] = gt_class[j]
            cls_mask[i, cell_idx, best_anchor, :] = 1

    coord_mask = coord_mask.expand_as(coord_target)

    coord_pred_variable, conf_pred_variable, cls_pred_variable = output_pred[
        0], output_pred[1], output_pred[2]

    coord_target = Variable(coord_target.view(batch_size, -1, 4))
    coord_mask = Variable(coord_mask.view(batch_size, -1, 4))
    conf_target = Variable(conf_target.view(batch_size, -1, 1))
    conf_mask = Variable(conf_mask.view(batch_size, -1, 1))
    cls_target = Variable(cls_target.view(-1).long())
    cls_mask = Variable(cls_mask.view(-1).long())

    keep = cls_mask.nonzero().squeeze(1)
    cls_pred_variable = cls_pred_variable[keep, :]
    cls_target = cls_target[keep] - 1

    # calculate loss
    coord_loss = cfg.COORD_SCALE * F.mse_loss(
        coord_pred_variable * coord_mask,
        coord_target * coord_mask,
        reduction='sum') / batch_size / 2.0
    conf_loss = F.mse_loss(conf_pred_variable * conf_mask,
                           conf_target * conf_mask,
                           reduction='sum') / batch_size / 2.0
    cls_loss = cfg.CLASS_SCALE * F.cross_entropy(
        cls_pred_variable, cls_target, reduction='sum') / batch_size

    return coord_loss, conf_loss, cls_loss