示例#1
0
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        -----------
        im: numpy array, input image array

        Returns:
        --------
        bboxes_align: numpy array
            bboxes after calibration
        """
        h, w, c = im.shape
        net_size = config.PNET_SIZE
        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape

        # bounding boxes for all the pyramid scales
        all_bboxes = list()
        # generating bounding boxes for each scale
        while min(current_height, current_width) > net_size:
            image_tensor = utils.convert_image_to_tensor(im_resized)
            feed_imgs = image_tensor.unsqueeze(0)
            feed_imgs = feed_imgs.to(self.device)

            cls_map, reg_map = self.pnet_detector(feed_imgs)
            cls_map_np = utils.convert_chwTensor_to_hwcNumpy(cls_map.cpu())
            reg_map_np = utils.convert_chwTensor_to_hwcNumpy(reg_map.cpu())
            bboxes = self.generate_bounding_box(cls_map_np, reg_map_np,
                                                current_scale, self.thresh[0])

            current_scale *= self.scale_factor
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if bboxes.size == 0:
                continue

            keep = utils.nms(bboxes[:, :5], 0.5, 'Union')
            bboxes = bboxes[keep]
            all_bboxes.append(bboxes)

        if len(all_bboxes) == 0:
            return None

        all_bboxes = np.vstack(all_bboxes)

        # apply nms to the detections from all the scales
        keep = utils.nms(all_bboxes[:, 0:5], 0.7, 'Union')
        all_bboxes = all_bboxes[keep]

        # 0-4: original bboxes, 5: score, 5: offsets
        bboxes_align = utils.calibrate_box(all_bboxes[:, 0:5], all_bboxes[:,
                                                                          5:])
        bboxes_align = utils.convert_to_square(bboxes_align)
        bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4])

        return bboxes_align
示例#2
0
    def detect_onet(self, im, bboxes):
        """Get face candidates using onet

        Parameters:
        ----------
        im: numpy array
            input image array
        bboxes: numpy array
            detection results of rnet

        Returns:
        -------
        bboxes_align: numpy array
            bboxes after calibration
        """
        net_size = config.ONET_SIZE
        h, w, c = im.shape
        if bboxes is None:
            return None

        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = utils.correct_bboxes(bboxes, w, h)
        num_bboxes = bboxes.shape[0]

        # crop face using rnet proposal
        cropped_ims_tensors = []
        for i in range(num_bboxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i],
                                                            x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (net_size, net_size))
                    crop_im_tensor = utils.convert_image_to_tensor(crop_im)
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print(e)

        feed_imgs = torch.stack(cropped_ims_tensors)
        feed_imgs = feed_imgs.to(self.device)

        cls, reg = self.onet_detector(feed_imgs)
        cls = cls.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls[:, 1] > self.thresh[2])[0]
        if len(keep_inds) > 0:
            keep_bboxes = bboxes[keep_inds]
            keep_cls = cls[keep_inds, :]
            keep_reg = reg[keep_inds]
            keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, ))
        else:
            return None

        bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg)
        keep = utils.nms(bboxes_align, 0.7, mode='Minimum')

        if len(keep) == 0:
            return None

        bboxes_align = bboxes_align[keep]
        bboxes_align = utils.convert_to_square(bboxes_align)
        return bboxes_align
示例#3
0
    def detect_rnet(self, im, bboxes):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array
            input image array
        bboxes: numpy array
            detection results of pnet

        Returns:
        -------
        bboxes_align: numpy array
            bboxes after calibration
        """
        net_size = config.RNET_SIZE
        h, w, c = im.shape
        if bboxes is None:
            return None

        num_bboxes = bboxes.shape[0]

        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = utils.correct_bboxes(bboxes, w, h)

        # crop face using pnet proposals
        cropped_ims_tensors = []
        for i in range(num_bboxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i],
                                                            x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (net_size, net_size))
                    crop_im_tensor = utils.convert_image_to_tensor(crop_im)
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print('dy: {}, edy: {}, dx: {}, edx: {}'.format(
                    dy[i], edy[i], dx[i], edx[i]))
                print('y: {}, ey: {}, x: {}, ex: {}'.format(
                    y[i], ey[i], x[i], ex[i]))
                print(e)

        # provide input tensor, if there are too many proposals in PNet
        # there might be OOM
        feed_imgs = torch.stack(cropped_ims_tensors)
        feed_imgs = feed_imgs.to(self.device)

        cls, reg = self.rnet_detector(feed_imgs)
        cls = cls.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls[:, 1] > self.thresh[1])[0]
        if len(keep_inds) > 0:
            keep_bboxes = bboxes[keep_inds]
            keep_cls = cls[keep_inds, :]
            keep_reg = reg[keep_inds]
            # using softmax 1 as cls score
            keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, ))
        else:
            return None

        keep = utils.nms(keep_bboxes, 0.7)
        if len(keep) == 0:
            return None

        keep_cls = keep_cls[keep]
        keep_bboxes = keep_bboxes[keep]
        keep_reg = keep_reg[keep]

        bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg)
        bboxes_align = utils.convert_to_square(bboxes_align)
        bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4])

        return bboxes_align
    def detect_rnet(self, im, dets):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of pnet

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """
        h, w, c = im.shape
        if dets is None:
            return None, None

        dets = utils.convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]

        cropped_ims_tensors = []
        for i in range(num_boxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i],
                                                            x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (24, 24))
                    crop_im_tensor = utils.convert_image_to_tensor(crop_im)
                    # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print('dy: {}, edy: {}, dx: {}, edx: {}'.format(
                    dy[i], edy[i], dx[i], edx[i]))
                print('y: {}, ey: {}, x: {}, ex: {}'.format(
                    y[i], ey[i], x[i], ex[i]))
                print(e)

        feed_imgs = torch.stack(cropped_ims_tensors)

        feed_imgs = feed_imgs.to(self.device)

        cls_map, reg, landmarks = self.rnet_detector(feed_imgs)
        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[1])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7)
        if len(keep) == 0:
            return None, None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        bw = keep_boxes[:, 2] - keep_boxes[:, 0]
        bh = keep_boxes[:, 3] - keep_boxes[:, 1]
        boxes = np.vstack([
            keep_boxes[:, 0], keep_boxes[:, 1], keep_boxes[:, 2],
            keep_boxes[:, 3], keep_cls[:, 0]
        ])
        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        boxes_align = np.vstack([
            align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:,
                                                                           0]
        ])
        boxes = boxes.T
        boxes_align = boxes_align.T

        return boxes, boxes_align
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array, input image array

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """
        h, w, c = im.shape
        net_size = 12
        current_scale = float(net_size) / \
                        self.min_face_size  # find initial scale
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape

        # fcn for pnet
        all_boxes = list()
        while min(current_height, current_width) > net_size:
            image_tensor = utils.convert_image_to_tensor(im_resized)
            feed_imgs = image_tensor.unsqueeze(0)

            feed_imgs = feed_imgs.to(self.device)

            cls_map, reg = self.pnet_detector(feed_imgs)
            cls_map_np = utils.convert_chwTensor_to_hwcNumpy(cls_map.cpu())
            reg_np = utils.convert_chwTensor_to_hwcNumpy(reg.cpu())

            boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np,
                                               current_scale, self.thresh[0])

            current_scale *= self.scale_factor
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            all_boxes.append(boxes)

        if len(all_boxes) == 0:
            return None, None

        all_boxes = np.vstack(all_boxes)

        # merge the detection from first stage
        keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
        all_boxes = all_boxes[keep]

        bw = all_boxes[:, 2] - all_boxes[:, 0]
        bh = all_boxes[:, 3] - all_boxes[:, 1]

        boxes = np.vstack([
            all_boxes[:, 0], all_boxes[:, 1], all_boxes[:, 2], all_boxes[:, 3],
            all_boxes[:, 4]
        ])

        boxes = boxes.T

        align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
        align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
        align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
        align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh

        # refine the boxes
        boxes_align = np.vstack([
            align_topx, align_topy, align_bottomx, align_bottomy, all_boxes[:,
                                                                            4]
        ])
        boxes_align = boxes_align.T

        return boxes, boxes_align