Exemplo n.º 1
0
    def forward(self, loc_data, conf_data, priors):

        #         loc_data   = prediction[:,:,:4]
        #         conf_data  = prediction[:,:,4:]

        num_priors = priors.shape[0]
        batch_size = loc_data.shape[0]

        output = np.zeros(shape=(batch_size, self.num_classes, self.top_k, 5),
                          dtype=np.float32)

        conf_preds = conf_data.swapaxes(2, 1)

        for i in range(batch_size):
            decoded_boxes = decode(loc=loc_data[i],
                                   priors=priors,
                                   variances=self.variances)

            conf_scores = conf_preds[i].copy()

            for cl in range(1, self.num_classes):
                c_mask = np.greater(conf_scores[cl], self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                scores = np.float32(scores)

                if scores.shape[0] == 0:
                    continue

                l_mask = c_mask.reshape(-1, 1).repeat(4, axis=-1)
                boxes = decoded_boxes[l_mask].reshape(-1, 4).astype(np.float32)
                #                 print(boxes.shape)

                boxes = torch.from_numpy(boxes).float()
                scores = torch.from_numpy(scores).float()

                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)

                #                ids, count = non_maximum_supression(boxes    = boxes,
                #                                                    scores   = scores,
                #                                                    overlap  = self.nms_thresh,
                #                                                    top_k    = self.top_k)
                ##
                #                 print(ids.shape)
                #                 print(count)
                ids = np.int32(ids)
                count = np.int32(count)

                scores = scores[ids[:count]]
                scores = np.expand_dims(scores, axis=1)

                output[i, cl, :count] = np.concatenate(
                    (scores, boxes[ids[:count]]), axis=-1)

#         flt = output.ascontiguousarray().reshape(batch_size, -1, 5)
#         idx  = np.argsort(flt[:,:,0], axis=-1)
#         rank = np.argsort(idx, axis=-1)

#         flt[rank < self.top_k].ex

        return output
Exemplo n.º 2
0
def run_first_stage(image, net, scale, threshold, gpu_id=0):
    """Run P-Net, generate bounding boxes, and do NMS.

    Arguments:
        image: an instance of PIL.Image.
        net: an instance of pytorch's nn.Module, P-Net.
        scale: a float number,
            scale width and height of the image by this number.
        threshold: a float number,
            threshold on the probability of a face when generating
            bounding boxes from predictions of the net.

    Returns:
        a float numpy array of shape [n_boxes, 9],
            bounding boxes with scores and offsets (4 + 1 + 4).
    """

    # scale the image and convert it to a float array
    width, height = image.size
    sw, sh = math.ceil(width * scale), math.ceil(height * scale)
    img = image.resize((sw, sh), Image.BILINEAR)
    img = np.asarray(img, 'float32')
    img = torch.FloatTensor(_preprocess(img)).to('cuda:%d' % gpu_id)
    output = net(img)
    probs = output[1].cpu().data.numpy()[0, 1, :, :]
    offsets = output[0].cpu().data.numpy()
    # probs: probability of a face at each sliding window
    # offsets: transformations to true bounding boxes

    boxes = _generate_bboxes(probs, offsets, scale, threshold)
    if len(boxes) == 0:
        return None

    keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
    return boxes[keep]
Exemplo n.º 3
0
def dofilter(frames, action_index, frame_index, nms_thresh):
    # filter out least likely detections for actions
    scores = frames[frame_index]['scores'][:, action_index]
    pick = np.where(scores > 0.001)
    scores = scores[pick]
    boxes = frames[frame_index]['boxes'][pick, :].squeeze(0)
    allscores = frames[frame_index]['scores'][pick, :].squeeze(0)
    # sort in descending order
    pick = np.argsort(scores)[::-1]
    # pick at most 50
    to_pick = min(50, len(pick))
    pick = pick[:to_pick]
    scores = scores[pick]
    boxes = boxes[pick, :]
    allscores = allscores[pick, :]
    # Perform nms on picked boxes
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32)
    if len(boxes) == 0 or len(scores) == 0 or len(allscores) == 0:
        return boxes, scores, allscores
    pick, counts = nms(torch.from_numpy(boxes), torch.from_numpy(scores),
                       nms_thresh)  # idsn - ids after nms
    pick = pick[:counts]
    #pick = nms(dets, nms_thresh)
    pick = pick[:counts].cpu().numpy()
    boxes = boxes[pick, :]
    scores = scores[pick]
    allscores = allscores[pick, :]
    return boxes, scores, allscores
Exemplo n.º 4
0
def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info):
    lod = rpn_rois.lod()[0]
    rpn_rois_v = np.array(rpn_rois)
    variance_v = np.array(cfg.bbox_reg_weights)
    confs_v = np.array(confs)
    locs_v = np.array(locs)
    im_results = [[] for _ in range(len(lod) - 1)]
    new_lod = [0]
    for i in range(len(lod) - 1):
        start = lod[i]
        end = lod[i + 1]
        if start == end:
            continue
        locs_n = locs_v[start:end, :]
        rois_n = rpn_rois_v[start:end, :]
        rois_n = rois_n / im_info[i][2]
        rois_n = box_decoder(locs_n, rois_n, variance_v)
        rois_n = clip_tiled_boxes(rois_n, im_info[i][:2] / im_info[i][2])

        cls_boxes = [[] for _ in range(class_nums)]
        scores_n = confs_v[start:end, :]
        for j in range(1, class_nums):
            inds = np.where(scores_n[:, j] > cfg.TEST.score_thresh)[0]
            scores_j = scores_n[inds, j]
            rois_j = rois_n[inds, j * 4:(j + 1) * 4]
            dets_j = np.hstack(
                (scores_j[:, np.newaxis], rois_j)).astype(np.float32,
                                                          copy=False)
            keep = box_utils.nms(dets_j, cfg.TEST.nms_thresh)
            nms_dets = dets_j[keep, :]
            #add labels
            label = np.array([j for _ in range(len(keep))])
            nms_dets = np.hstack(
                (nms_dets, label[:, np.newaxis])).astype(np.float32,
                                                         copy=False)
            cls_boxes[j] = nms_dets
    # Limit to max_per_image detections **over all classes**
        image_scores = np.hstack(
            [cls_boxes[j][:, 1] for j in range(1, class_nums)])
        if len(image_scores) > cfg.TEST.detections_per_im:
            image_thresh = np.sort(image_scores)[-cfg.TEST.detections_per_im]
            for j in range(1, class_nums):
                keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0]
                cls_boxes[j] = cls_boxes[j][keep, :]

        im_results_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)])
        im_results[i] = im_results_n
        new_lod.append(len(im_results_n) + new_lod[-1])
        boxes = im_results_n[:, 2:]
        scores = im_results_n[:, 1]
        labels = im_results_n[:, 0]
    im_results = np.vstack([im_results[k] for k in range(len(lod) - 1)])
    return new_lod, im_results
Exemplo n.º 5
0
    def forward(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh,
                loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        self.num_classes = num_classes
        self.background_label = bkg_label
        self.top_k = top_k
        # Parameters used in nms.
        self.nms_thresh = nms_thresh
        if nms_thresh <= 0:
            raise ValueError('nms_threshold must be non negative.')
        self.conf_thresh = conf_thresh
        self.variance = cfg['variance']
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()
            # num_det = 0
            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
Exemplo n.º 6
0
    def forward(self, loc_data, conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        # [バッチサイズN,クラス数21,トップ200件,確信度+位置]のゼロリストを作成
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        # 確信度を[バッチサイズN,クラス数,ボックス数]の順番に変更
        conf_preds = conf_data.view(num, num_priors,
                                    self.num_classes).transpose(2, 1)

        # Decode predictions into bboxes.
        for i in range(num):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(1, self.num_classes):
                # 確信度の閾値を使ってボックスを削除
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                # handbook
                if scores.size(0) == 0:
                    # handbook
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                # ボックスのデコード処理
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                # boxesからNMSで重複するボックスを削除
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].unsqueeze(1),
                               boxes[ids[:count]]), 1)
        flt = output.contiguous().view(num, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
    def forward(self, loc_data, conf_data, prior_data, conf_thresh):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [1,num_priors,4]
        """
        batch_size = loc_data.size(0)
        num_priors = prior_data.size(0)
        output = torch.zeros(batch_size, self.num_classes, self.top_k, 5)
        if loc_data.is_cuda:
            output = output.cuda()
        conf_preds = conf_data.transpose(2, 1)  # group by classes

        # Decode predictions into bboxes.
        for i in range(batch_size):
            decoded_boxes = decode(loc_data[i], prior_data, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()

            for cl in range(self.num_classes):
                c_mask = conf_scores[cl].gt(conf_thresh)
                scores = conf_scores[cl][c_mask]
                if scores.dim() == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                thresholded_boxes = decoded_boxes[l_mask]
                if len(thresholded_boxes) > 0:
                    boxes = thresholded_boxes.view(-1, 4)
                    # idx of highest scoring and non-overlapping boxes per class
                    ids, count = nms(boxes, scores, self.nms_thresh,
                                     self.top_k)
                    output[i, cl, :count] = \
                        torch.cat((scores[ids[:count]].unsqueeze(1),
                                   boxes[ids[:count]]), 1)
        flt = output.contiguous().view(batch_size, -1, 5)
        _, idx = flt[:, :, 0].sort(1, descending=True)
        _, rank = idx.sort(1)
        flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        return output
Exemplo n.º 8
0
 def predict(self, image, top_k=-1, prob_threshold=None):
     cpu_device = torch.device("cpu")
     height, width, _ = image.shape
     image = self.transform(image)
     images = image.unsqueeze(0)
     images = images.to(self.device)
     with torch.no_grad():
         self.timer.start()
         scores, boxes = self.net.forward(images)
         print("Inference time: ", self.timer.end())
     boxes = boxes[0]
     scores = scores[0]
     if not prob_threshold:
         prob_threshold = self.filter_threshold
     # this version of nms is slower on GPU, so we move data to CPU.
     boxes = boxes.to(cpu_device)
     scores = scores.to(cpu_device)
     picked_box_probs = []
     picked_labels = []
     for class_index in range(1, scores.size(1)):
         probs = scores[:, class_index]
         mask = probs > prob_threshold
         probs = probs[mask]
         if probs.size(0) == 0:
             continue
         subset_boxes = boxes[mask, :]
         box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
         box_probs = box_utils.nms(box_probs, self.nms_method,
                                   score_threshold=prob_threshold,
                                   iou_threshold=self.iou_threshold,
                                   sigma=self.sigma,
                                   top_k=top_k,
                                   candidate_size=self.candidate_size)
         picked_box_probs.append(box_probs)
         picked_labels.extend([class_index] * box_probs.size(0))
     if not picked_box_probs:
         return torch.tensor([]), torch.tensor([]), torch.tensor([])
     picked_box_probs = torch.cat(picked_box_probs)
     picked_box_probs[:, 0] *= width
     picked_box_probs[:, 1] *= height
     picked_box_probs[:, 2] *= width
     picked_box_probs[:, 3] *= height
     return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4]
Exemplo n.º 9
0
def run_first_stage(image, net, scale, threshold):
    """Run P-Net, generate bounding boxes, and do NMS.

    Arguments:
        image: an instance of PIL.Image.
        net: an instance of pytorch's nn.Module, P-Net.
        scale: a float number,
            scale width and height of the image by this number.
        threshold: a float number,
            threshold on the probability of a face when generating
            bounding boxes from predictions of the net.

    Returns:
        a float numpy array of shape [n_boxes, 9],
            bounding boxes with scores and offsets (4 + 1 + 4).
    """

    # scale the image and convert it to a float array
    width, height = image.size
    sw, sh = math.ceil(width*scale), math.ceil(height*scale)
    img = image.resize((sw, sh), Image.BILINEAR)
    img = np.asarray(img, 'float32')

    img = Variable(torch.FloatTensor(_preprocess(img)), volatile = True)
    output = net(img)
    probs = output[1].data.numpy()[0, 1, :, :]
    offsets = output[0].data.numpy()
    # probs: probability of a face at each sliding window
    # offsets: transformations to true bounding boxes

    boxes = _generate_bboxes(probs, offsets, scale, threshold)
    if len(boxes) == 0:
        return None

    keep = nms(boxes[:, 0:5], overlap_threshold = 0.5)
    return boxes[keep]
Exemplo n.º 10
0
def detect_faces(image, min_face_size=20.0,
                 thresholds=[0.6, 0.7, 0.8],
                 nms_thresholds=[0.7, 0.7, 0.7]):
    """
    Arguments:
        image: an instance of PIL.Image.
        min_face_size: a float number.
        thresholds: a list of length 3.
        nms_thresholds: a list of length 3.
    Returns:
        two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
        bounding boxes and facial landmarks.
    """

    with torch.no_grad():
        # LOAD MODELS
        pnet = PNet().to(device)
        rnet = RNet().to(device)
        onet = ONet().to(device)
        onet.eval()

        # BUILD AN IMAGE PYRAMID
        width, height = image.size
        min_length = min(height, width)

        min_detection_size = 12
        factor = 0.707  # sqrt(0.5)

        # scales for scaling the image
        scales = []

        # scales the image so that
        # minimum size that we can detect equals to
        # minimum face size that we want to detect
        m = min_detection_size / min_face_size
        min_length *= m

        factor_count = 0
        while min_length > min_detection_size:
            scales.append(m * factor ** factor_count)
            min_length *= factor
            factor_count += 1

        # STAGE 1

        # it will be returned
        bounding_boxes = []

        # run P-Net on different scales
        for s in scales:
            boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
            bounding_boxes.append(boxes)

        # collect boxes (and offsets, and scores) from different scales
        bounding_boxes = [i for i in bounding_boxes if i is not None]
        bounding_boxes = np.vstack(bounding_boxes)

        keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
        bounding_boxes = bounding_boxes[keep]

        # use offsets predicted by pnet to transform bounding boxes
        bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
        # shape [n_boxes, 5]

        bounding_boxes = convert_to_square(bounding_boxes)
        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

        # STAGE 2

        img_boxes = get_image_boxes(bounding_boxes, image, size=24)
        img_boxes = Variable(torch.FloatTensor(img_boxes).to(device))
        output = rnet(img_boxes)
        offsets = output[0].data.cpu().numpy()  # shape [n_boxes, 4]
        probs = output[1].data.cpu().numpy()  # shape [n_boxes, 2]

        keep = np.where(probs[:, 1] > thresholds[1])[0]
        bounding_boxes = bounding_boxes[keep]
        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
        offsets = offsets[keep]

        keep = nms(bounding_boxes, nms_thresholds[1])
        bounding_boxes = bounding_boxes[keep]
        bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
        bounding_boxes = convert_to_square(bounding_boxes)
        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

        # STAGE 3

        img_boxes = get_image_boxes(bounding_boxes, image, size=48)
        if len(img_boxes) == 0:
            return [], []
        img_boxes = Variable(torch.FloatTensor(img_boxes).to(device))
        output = onet(img_boxes)
        landmarks = output[0].data.cpu().numpy()  # shape [n_boxes, 10]
        offsets = output[1].data.cpu().numpy()  # shape [n_boxes, 4]
        probs = output[2].data.cpu().numpy()  # shape [n_boxes, 2]

        keep = np.where(probs[:, 1] > thresholds[2])[0]
        bounding_boxes = bounding_boxes[keep]
        bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
        offsets = offsets[keep]
        landmarks = landmarks[keep]

        # compute landmark points
        width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
        height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
        xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
        landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
        landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]

        bounding_boxes = calibrate_box(bounding_boxes, offsets)
        keep = nms(bounding_boxes, nms_thresholds[2], mode='min')
        bounding_boxes = bounding_boxes[keep]
        landmarks = landmarks[keep]

        return bounding_boxes, landmarks
Exemplo n.º 11
0
def detect_faces(image, min_face_size = 20.0,
                 thresholds=[0.6, 0.7, 0.8],
                 nms_thresholds=[0.7, 0.7, 0.7]):
    """
    Arguments:
        image: an instance of PIL.Image.
        min_face_size: a float number.
        thresholds: a list of length 3.
        nms_thresholds: a list of length 3.

    Returns:
        two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
        bounding boxes and facial landmarks.
    """

    # LOAD MODELS
    pnet = PNet()
    rnet = RNet()
    onet = ONet()
    onet.eval()

    # BUILD AN IMAGE PYRAMID
    width, height = image.size
    min_length = min(height, width)

    min_detection_size = 12
    factor = 0.707  # sqrt(0.5)

    # scales for scaling the image
    scales = []

    # scales the image so that
    # minimum size that we can detect equals to
    # minimum face size that we want to detect
    m = min_detection_size/min_face_size
    min_length *= m

    factor_count = 0
    while min_length > min_detection_size:
        scales.append(m*factor**factor_count)
        min_length *= factor
        factor_count += 1

    # STAGE 1

    # it will be returned
    bounding_boxes = []

    # run P-Net on different scales
    for s in scales:
        boxes = run_first_stage(image, pnet, scale = s, threshold = thresholds[0])
        bounding_boxes.append(boxes)

    # collect boxes (and offsets, and scores) from different scales
    bounding_boxes = [i for i in bounding_boxes if i is not None]
    bounding_boxes = np.vstack(bounding_boxes)

    keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
    bounding_boxes = bounding_boxes[keep]

    # use offsets predicted by pnet to transform bounding boxes
    bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
    # shape [n_boxes, 5]

    bounding_boxes = convert_to_square(bounding_boxes)
    bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

    # STAGE 2

    img_boxes = get_image_boxes(bounding_boxes, image, size = 24)
    img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True)
    output = rnet(img_boxes)
    offsets = output[0].data.numpy()  # shape [n_boxes, 4]
    probs = output[1].data.numpy()  # shape [n_boxes, 2]

    keep = np.where(probs[:, 1] > thresholds[1])[0]
    bounding_boxes = bounding_boxes[keep]
    bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
    offsets = offsets[keep]

    keep = nms(bounding_boxes, nms_thresholds[1])
    bounding_boxes = bounding_boxes[keep]
    bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
    bounding_boxes = convert_to_square(bounding_boxes)
    bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

    # STAGE 3

    img_boxes = get_image_boxes(bounding_boxes, image, size = 48)
    if len(img_boxes) == 0: 
        return [], []
    img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True)
    output = onet(img_boxes)
    landmarks = output[0].data.numpy()  # shape [n_boxes, 10]
    offsets = output[1].data.numpy()  # shape [n_boxes, 4]
    probs = output[2].data.numpy()  # shape [n_boxes, 2]

    keep = np.where(probs[:, 1] > thresholds[2])[0]
    bounding_boxes = bounding_boxes[keep]
    bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, ))
    offsets = offsets[keep]
    landmarks = landmarks[keep]

    # compute landmark points
    width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
    height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
    xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
    landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5]
    landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10]

    bounding_boxes = calibrate_box(bounding_boxes, offsets)
    keep = nms(bounding_boxes, nms_thresholds[2], mode = 'min')
    bounding_boxes = bounding_boxes[keep]
    landmarks = landmarks[keep]

    return bounding_boxes, landmarks
Exemplo n.º 12
0
    def _forward_test(self, input):
        cnn_features = input
        arg = easydict.EasyDict({
            'clip_boxes': self.test_clip_boxes,
            'nms_thresh': self.test_nms_thresh,
            'max_proposals': self.test_max_proposals
        })

        # Make sure that setImageSize has been called
        assert self.image_height and self.image_width and not self._called_forward_size, \
         'Must call setImageSize before each forward pass'
        self._called_forward_size = True

        rpn_out, act_reg = self.rpn.forward(cnn_features)
        rpn_boxes, rpn_anchors, rpn_trans, rpn_scores = rpn_out
        num_boxes = rpn_boxes.size(1)

        # Maybe clip boxes to image boundary
        if arg.clip_boxes:
            bounds = {
                'x_min': 1,
                'y_min': 1,
                'x_max': self.image_width,
                'y_max': self.image_height
            }
            rpn_boxes, valid = box_utils.clip_boxes(rpn_boxes, bounds,
                                                    'xcycwh')

            #print(string.format('%d/%d boxes are predicted valid',
            #      torch.sum(valid), valid:nElement()))

            #Clamp parallel arrays only to valid boxes (not oob of the image)
            rpn_boxes = self.clamp_data(rpn_boxes, valid)
            rpn_anchors = self.clamp_data(rpn_anchors, valid)
            rpn_trans = self.clamp_data(rpn_trans, valid)
            rpn_scores = self.clamp_data(rpn_scores, valid)
            num_boxes = rpn_boxes.size(1)

        # Convert rpn boxes from (xc, yc, w, h) format to (x1, y1, x2, y2)
        rpn_boxes_x1y1x2y2 = box_utils.xcycwh_to_x1y1x2y2(rpn_boxes[0])

        # Convert objectness positive / negative scores to probabilities
        rpn_scores_exp = torch.exp(rpn_scores)
        pos_exp = rpn_scores_exp[0, :, 0]
        neg_exp = rpn_scores_exp[0, :, 1]
        scores = (pos_exp + neg_exp).pow(-1) * pos_exp

        verbose = False
        if verbose:
            print('in LocalizationLayer forward_test')
            print('Before NMS there are %d boxes' % num_boxes)
            print('Using NMS threshold %f' % arg.nms_thresh)

        #Run NMS and sort by objectness score
        boxes_scores = torch.cat((rpn_boxes_x1y1x2y2, scores.view(-1, 1)),
                                 dim=1)

        if arg.max_proposals == -1:
            idx = box_utils.nms(boxes_scores.data, arg.nms_thresh)
        else:
            idx = box_utils.nms(boxes_scores.data, arg.nms_thresh,
                                arg.max_proposals)

        rpn_boxes_nms = torch.squeeze(rpn_boxes)[idx]

        if verbose:
            print('After NMS there are %d boxes' % rpn_boxes_nms.size(0))

        output = rpn_boxes_nms
        return output
Exemplo n.º 13
0
                                        priors=priors)

sample_np = data_np[0]
images_np, targets_np = sample_np

loc_data = targets_np[:, :, :4]
conf_data = targets_np[:, :, 4:]

a = loc_data[0, :, :]

decoded_np = decode_np(loc=a, priors=priors, variances=[0.1, 0.2])

a_ = torch.from_numpy(a).float()
priors_ = torch.from_numpy(priors).float()

decoded_th = decode_th(loc=a_, priors=priors_, variances=[0.1, 0.2])
c = decoded_th.numpy() == decoded_np
print(np.sum(c))

scores = np.random.rand(8732, )

scores_ = torch.from_numpy(scores).float()

nms_np = non_maximum_supression(boxes=decoded_np,
                                scores=scores,
                                top_k=200,
                                overlap=0.5)
nms_th = nms(boxes=decoded_th, scores=scores_, overlap=0.5, top_k=200)

print(nms_th[0].numpy())
print(nms_np[0])
Exemplo n.º 14
0
    def forward(self, arm_loc_data, arm_conf_data, odm_loc_data, odm_conf_data, prior_data):
        """
        Args:
            loc_data: (tensor) Loc preds from loc layers
                Shape: [batch,num_priors*4]
            conf_data: (tensor) Shape: Conf preds from conf layers
                Shape: [batch*num_priors,num_classes]
            prior_data: (tensor) Prior boxes and variances from priorbox layers
                Shape: [num_priors,4]
        """
        loc_data = odm_loc_data
        conf_data = F.softmax(odm_conf_data,dim=2)
        arm_conf_data = F.softmax(arm_conf_data,dim=2)

        arm_object_conf = arm_conf_data.data[:, :, 1:]
        no_object_index = arm_object_conf <= self.objectness_thre
        conf_data[no_object_index.expand_as(conf_data)] = 0

        num = loc_data.size(0)  # batch size
        num_priors = prior_data.size(0)
        output = torch.zeros(num, self.num_classes, self.top_k, 5)
        conf_preds = conf_data.view(num, num_priors,
                                   self.num_classes).transpose(2, 1)
        #conf_preds = conf_data.view(num,num_priors,self.num_classes)
        # Decode predictions into bboxes.
        if torch.cuda.is_available():
            prior_data.cuda()
        for i in range(num):
            default = decode(arm_loc_data[i], prior_data, self.variance)
            default = center_size(default)
            decoded_boxes = decode(loc_data[i], default, self.variance)
            # For each class, perform nms
            conf_scores = conf_preds[i].clone()
            '''
            prior_conf_max,prior_conf_idx = conf_scores.max(1,keepdim=True)
            cls_mask = prior_conf_idx.gt(0)
            prior_conf_max = prior_conf_max[cls_mask]
            prior_conf_idx = prior_conf_idx[cls_mask]
            decoded_boxes = decoded_boxes[cls_mask]
            conf_mask = prior_conf_max.gt(self.conf_thresh)
            prior_conf_max = prior_conf_max[conf_mask]
            prior_conf_idx = prior_conf_idx[conf_mask]
            decoded_boxes = decoded_boxes[conf_mask]
            '''
            #print(decoded_boxes, conf_scores)
            for cl in range(1, self.num_classes):
                c_mask = conf_scores[cl].gt(self.conf_thresh)
                scores = conf_scores[cl][c_mask]
                #print(scores.dim())
                if scores.size(0) == 0:
                    continue
                l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
                boxes = decoded_boxes[l_mask].view(-1, 4)
                # idx of highest scoring and non-overlapping boxes per class
                #print(boxes.size(), scores.size())
                ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
                ids = torch.tensor(ids,dtype=torch.long)
                if count ==0:
                    continue
                #print(count,ids[:count],torch.gather(scores,0,ids).data)
                #print(boxes[ids[:count]])
                #print('debug',scores[ids[:count]].size(),boxes[ids[:count]].size())
                output[i, cl, :count] = \
                    torch.cat((scores[ids[:count]].view(-1,1),
                               boxes[ids[:count]].view(-1,4)), 1)
        #flt = output.contiguous().view(num, -1, 5)
        #_, idx = flt[:, :, 0].sort(1, descending=True)
        #_, rank = idx.sort(1)                                             ############????????
        #flt[(rank < self.keep_top_k).unsqueeze(-1).expand_as(flt)].fill_(0)
        #print('fit',output.size())
        return output