Exemplo n.º 1
0
    def detect_onet(self, im, dets):
        """Get face candidates using onet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of rnet

        Returns:
        -------
        boxes_align: numpy array
            boxes after calibration
        landmarks_align: numpy array
            landmarks after calibration

        """
        h, w, c = im.shape

        if dets is None:
            return None, None

        dets = self.square_bbox(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]

        # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
        cropped_ims_tensors = []
        for i in range(num_boxes):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1,
                                                            x[i]:ex[i] + 1, :]
            crop_im = cv2.resize(tmp, (48, 48))
            crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
            # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
            cropped_ims_tensors.append(crop_im_tensor)
        feed_imgs = Variable(torch.stack(cropped_ims_tensors))

        if self.rnet_detector.use_cuda:
            feed_imgs = feed_imgs.cuda()

        cls_map, reg, landmark = self.onet_detector(feed_imgs)

        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()
        landmark = landmark.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[2])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            landmark = landmark[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7, mode="Minimum")

        if len(keep) == 0:
            return None, None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        keep_landmark = landmark[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
        bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1

        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        align_landmark_topx = keep_boxes[:, 0]
        align_landmark_topy = keep_boxes[:, 1]

        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            keep_cls[:, 0],
            # align_topx + keep_landmark[:, 0] * bw,
            # align_topy + keep_landmark[:, 1] * bh,
            # align_topx + keep_landmark[:, 2] * bw,
            # align_topy + keep_landmark[:, 3] * bh,
            # align_topx + keep_landmark[:, 4] * bw,
            # align_topy + keep_landmark[:, 5] * bh,
            # align_topx + keep_landmark[:, 6] * bw,
            # align_topy + keep_landmark[:, 7] * bh,
            # align_topx + keep_landmark[:, 8] * bw,
            # align_topy + keep_landmark[:, 9] * bh,
        ])

        boxes_align = boxes_align.T

        landmark = np.vstack([
            align_landmark_topx + keep_landmark[:, 0] * bw,
            align_landmark_topy + keep_landmark[:, 1] * bh,
            align_landmark_topx + keep_landmark[:, 2] * bw,
            align_landmark_topy + keep_landmark[:, 3] * bh,
            align_landmark_topx + keep_landmark[:, 4] * bw,
            align_landmark_topy + keep_landmark[:, 5] * bh,
            align_landmark_topx + keep_landmark[:, 6] * bw,
            align_landmark_topy + keep_landmark[:, 7] * bh,
            align_landmark_topx + keep_landmark[:, 8] * bw,
            align_landmark_topy + keep_landmark[:, 9] * bh,
        ])

        landmark_align = landmark.T

        return boxes_align, landmark_align
Exemplo n.º 2
0
    def detect_rnet(self, im, dets):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of pnet

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """
        h, w, c = im.shape

        if dets is None:
            return None, None

        dets = self.square_bbox(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]
        '''
        # helper for setting RNet batch size
        batch_size = self.rnet_detector.batch_size
        ratio = float(num_boxes) / batch_size
        if ratio > 3 or ratio < 0.3:
            print "You may need to reset RNet batch size if this info appears frequently, \
face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
        '''

        # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
        cropped_ims_tensors = []
        for i in range(num_boxes):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1,
                                                            x[i]:ex[i] + 1, :]
            crop_im = cv2.resize(tmp, (24, 24))
            crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
            # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
            cropped_ims_tensors.append(crop_im_tensor)
        feed_imgs = Variable(torch.stack(cropped_ims_tensors))

        if self.rnet_detector.use_cuda:
            feed_imgs = feed_imgs.cuda()

        cls_map, reg = self.rnet_detector(feed_imgs)

        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()
        # landmark = landmark.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[1])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            # landmark = landmark[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7)

        if len(keep) == 0:
            return None, None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        # keep_landmark = landmark[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
        bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1

        boxes = np.vstack([
            keep_boxes[:, 0],
            keep_boxes[:, 1],
            keep_boxes[:, 2],
            keep_boxes[:, 3],
            keep_cls[:, 0],
            # keep_boxes[:,0] + keep_landmark[:, 0] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 1] * bh,
            # keep_boxes[:,0] + keep_landmark[:, 2] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 3] * bh,
            # keep_boxes[:,0] + keep_landmark[:, 4] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 5] * bh,
            # keep_boxes[:,0] + keep_landmark[:, 6] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 7] * bh,
            # keep_boxes[:,0] + keep_landmark[:, 8] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 9] * bh,
        ])

        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            keep_cls[:, 0],
            # align_topx + keep_landmark[:, 0] * bw,
            # align_topy + keep_landmark[:, 1] * bh,
            # align_topx + keep_landmark[:, 2] * bw,
            # align_topy + keep_landmark[:, 3] * bh,
            # align_topx + keep_landmark[:, 4] * bw,
            # align_topy + keep_landmark[:, 5] * bh,
            # align_topx + keep_landmark[:, 6] * bw,
            # align_topy + keep_landmark[:, 7] * bh,
            # align_topx + keep_landmark[:, 8] * bw,
            # align_topy + keep_landmark[:, 9] * bh,
        ])

        boxes = boxes.T
        boxes_align = boxes_align.T

        return boxes, boxes_align
Exemplo n.º 3
0
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array
            input image array

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """

        # im = self.unique_image_format(im)

        h, w, c = im.shape
        net_size = 12

        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape

        # fcn
        all_boxes = list()
        while min(current_height, current_width) > net_size:
            feed_imgs = []
            image_tensor = image_tools.convert_image_to_tensor(im_resized)
            feed_imgs.append(image_tensor)
            feed_imgs = torch.stack(feed_imgs)
            feed_imgs = Variable(feed_imgs)

            if self.pnet_detector.use_cuda:
                feed_imgs = feed_imgs.cuda()

            cls_map, reg = self.pnet_detector(feed_imgs)

            cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(
                cls_map.cpu())
            reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())
            # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())

            boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np,
                                               current_scale, self.thresh[0])

            current_scale *= self.scale_factor
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            all_boxes.append(boxes)

        if len(all_boxes) == 0:
            return None, None

        all_boxes = np.vstack(all_boxes)

        # merge the detection from first stage
        keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
        all_boxes = all_boxes[keep]
        # boxes = all_boxes[:, :5]

        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # landmark_keep = all_boxes[:, 9:].reshape((5,2))

        boxes = np.vstack([
            all_boxes[:, 0],
            all_boxes[:, 1],
            all_boxes[:, 2],
            all_boxes[:, 3],
            all_boxes[:, 4],
            # all_boxes[:, 0] + all_boxes[:, 9] * bw,
            # all_boxes[:, 1] + all_boxes[:,10] * bh,
            # all_boxes[:, 0] + all_boxes[:, 11] * bw,
            # all_boxes[:, 1] + all_boxes[:, 12] * bh,
            # all_boxes[:, 0] + all_boxes[:, 13] * bw,
            # all_boxes[:, 1] + all_boxes[:, 14] * bh,
            # all_boxes[:, 0] + all_boxes[:, 15] * bw,
            # all_boxes[:, 1] + all_boxes[:, 16] * bh,
            # all_boxes[:, 0] + all_boxes[:, 17] * bw,
            # all_boxes[:, 1] + all_boxes[:, 18] * bh
        ])

        boxes = boxes.T

        align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
        align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
        align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
        align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh

        # refine the boxes
        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            all_boxes[:, 4],
            # align_topx + all_boxes[:,9] * bw,
            # align_topy + all_boxes[:,10] * bh,
            # align_topx + all_boxes[:,11] * bw,
            # align_topy + all_boxes[:,12] * bh,
            # align_topx + all_boxes[:,13] * bw,
            # align_topy + all_boxes[:,14] * bh,
            # align_topx + all_boxes[:,15] * bw,
            # align_topy + all_boxes[:,16] * bh,
            # align_topx + all_boxes[:,17] * bw,
            # align_topy + all_boxes[:,18] * bh,
        ])
        boxes_align = boxes_align.T

        return boxes, boxes_align
Exemplo n.º 4
0
    def detect_onet(self, im, dets):
        """Get face candidates using onet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of rnet

        Returns:
        -------
        boxes_align: numpy array
            boxes after calibration
        landmarks_align: numpy array
            landmarks after calibration

        """
        _, _, h, w = im.shape

        if dets is None:
            return None, None

        dets = self.square_bbox(dets)
        dets[:, 0:4] = torch.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]

        # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
        cropped_ims_tensors = []
        for i in range(num_boxes):
            tmp = self.tensortype.FloatTensor(1, 3, tmph[i], tmpw[i]).fill_(0)
            tmp[..., dy[i]:edy[i] + 1,
                dx[i]:edx[i] + 1] = im[..., y[i]:ey[i] + 1, x[i]:ex[i] + 1]
            crop_im = F.interpolate(tmp, size=(48, 48))
            crop_im_tensor = crop_im
            # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
            cropped_ims_tensors.append(crop_im_tensor)
        feed_imgs = torch.cat(cropped_ims_tensors)

        if self.rnet_detector.use_cuda:
            feed_imgs = feed_imgs.cuda()

        cls_map, reg, landmark = self.onet_detector(feed_imgs)

        keep_inds = (cls_map.squeeze() > self.thresh[2]).nonzero().squeeze()

        if keep_inds.dim() > 0 and len(keep_inds) > 0:
            boxes = dets[keep_inds]
            _cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            landmark = landmark[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7, mode="Minimum")

        if len(keep) == 0:
            return None, None

        keep_cls = _cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        keep_landmark = landmark[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
        bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1

        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        align_landmark_topx = keep_boxes[:, 0]
        align_landmark_topy = keep_boxes[:, 1]

        boxes_align = torch.stack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            keep_cls[:, 0],
        ],
                                  dim=-1)

        landmark = torch.stack([
            align_landmark_topx + keep_landmark[:, 0] * bw,
            align_landmark_topy + keep_landmark[:, 1] * bh,
            align_landmark_topx + keep_landmark[:, 2] * bw,
            align_landmark_topy + keep_landmark[:, 3] * bh,
            align_landmark_topx + keep_landmark[:, 4] * bw,
            align_landmark_topy + keep_landmark[:, 5] * bh,
            align_landmark_topx + keep_landmark[:, 6] * bw,
            align_landmark_topy + keep_landmark[:, 7] * bh,
            align_landmark_topx + keep_landmark[:, 8] * bw,
            align_landmark_topy + keep_landmark[:, 9] * bh,
        ],
                               dim=-1)

        return boxes_align, landmark
Exemplo n.º 5
0
    def detect_rnet(self, im, dets):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: torch Tensor 1x3xHxW
            input image array
        dets: numpy array
            detection results of pnet

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """
        _, _, h, w = im.shape

        if dets is None:
            return None, None

        dets = self.square_bbox(dets)
        dets[:, 0:4] = torch.round(dets[:, 0:4])
        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]
        if num_boxes == 0:
            return None, None
        '''
        # helper for setting RNet batch size
        batch_size = self.rnet_detector.batch_size
        ratio = float(num_boxes) / batch_size
        if ratio > 3 or ratio < 0.3:
            print "You may need to reset RNet batch size if this info appears frequently, \
        face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
        '''

        # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
        cropped_ims_tensors = []
        for i in range(num_boxes):
            tmp = self.tensortype.FloatTensor(1, 3, tmph[i], tmpw[i]).fill_(0)
            tmp[..., dy[i]:edy[i] + 1,
                dx[i]:edx[i] + 1] = im[..., y[i]:ey[i] + 1, x[i]:ex[i] + 1]
            crop_im = F.interpolate(tmp, size=(24, 24))
            crop_im_tensor = crop_im
            cropped_ims_tensors.append(crop_im_tensor)
        feed_imgs = torch.cat(cropped_ims_tensors)

        if self.rnet_detector.use_cuda:
            feed_imgs = feed_imgs.cuda()

        cls_map, reg = self.rnet_detector(feed_imgs)

        cls_map = cls_map
        reg = reg
        # landmark = landmark.cpu().data.numpy()

        keep_inds = (cls_map.squeeze() > self.thresh[1]).nonzero().squeeze()

        if keep_inds.dim() > 0 and len(keep_inds) > 0:
            boxes = dets[keep_inds]
            _cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            # landmark = landmark[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7)

        if len(keep) == 0:
            return None, None

        keep_cls = _cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        # keep_landmark = landmark[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
        bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1

        boxes = torch.cat([keep_boxes[:, 0:4], keep_cls[:, 0:1]], dim=-1)

        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        boxes_align = torch.stack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            keep_cls[:, 0],
        ],
                                  dim=-1)

        return boxes, boxes_align
Exemplo n.º 6
0
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: torch Tensor
            input image array

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """

        net_size = 12
        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        im_resized = self.resize_image(im, current_scale)
        _, _, current_height, current_width = im_resized.shape

        # fcn
        all_boxes = list()
        while min(current_height, current_width) > net_size:
            feed_imgs = im_resized

            if self.pnet_detector.use_cuda:
                feed_imgs = feed_imgs.cuda()

            cls_map, reg = self.pnet_detector(feed_imgs.float())

            boxes = self.generate_bounding_box(cls_map[0, :, :], reg,
                                               current_scale, self.thresh[0])

            current_scale *= self.scale_factor

            im_resized = self.resize_image(im, current_scale)
            _, _, current_height, current_width = im_resized.shape

            if boxes.nelement() == 0:
                continue
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            all_boxes.append(boxes)

        if len(all_boxes) == 0:
            return None, None
        all_boxes = torch.cat(all_boxes)

        # merge the detection from first stage
        keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
        all_boxes = all_boxes[keep]
        # boxes = all_boxes[:, :5]

        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # landmark_keep = all_boxes[:, 9:].reshape((5,2))

        boxes = all_boxes[:, :5]

        # boxes = boxes.t()

        align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
        align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
        align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
        align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh

        # refine the boxes
        boxes_align = torch.stack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            all_boxes[:, 4],
        ],
                                  dim=-1)
        # boxes_align = boxes_align.t()

        return boxes, boxes_align