Пример #1
0
    def detect_pnet(self, im):
        ##图像金字塔
        h, w, c = im.shape
        net_size = 12
        minl = np.min((w, h))
        base_scale = net_size / float(self.min_face_size)
        scales = []
        face_count = 0
        while minl > net_size:
            s = base_scale * self.scalor ** face_count
            if np.floor(minl * s) <= 12:
                break
            scales += [s]
            face_count += 1

        # 对每个scale层做预测
        total_boxes = []
        for scale in scales:
            hs = np.ceil(h * scale)
            ws = np.ceil(w * scale)
            hs = int(hs)
            ws = int(ws)
            im_data = cv2.resize(im, (ws, hs))
            input = Image2Tensor(im_data, (127.5,127.5,127.5))
            input = input.to(self.device)
            self.pnet.eval()
            with torch.no_grad():
                output_cls, output_reg = self.pnet(input)
            output_cls = output_cls.squeeze_(0).detach().cpu().numpy()
            output_reg = output_reg.squeeze_(0).detach().cpu().numpy()

            bboxes = self.generate_bbox(output_cls, output_reg, scale, self.threshold[0])

            # inter-scale nms
            if len(bboxes) <= 0:
                continue
            keep = py_nms(bboxes, 0.5, 'Union')

            if len(keep) <= 0:
                continue

            bboxes = bboxes[keep]
            #
            total_boxes.extend(bboxes)

        # 金字塔所有层做完之后,再做一次NMS
        # NMS
        if len(total_boxes) <= 0:
            return None
        total_boxes = np.array(total_boxes)
        keep = py_nms(total_boxes, 0.7, 'Union')
        if len(keep) <= 0:
            return None
        return total_boxes[keep]
Пример #2
0
    def detect_pnet(self, im):
        #print('pnet.......')
        h, w, c = im.shape
        net_size = 12
        minl = np.min((w, h))
        base_scale = net_size / float(self.min_face_size)
        scales = []
        face_count = 0
        while minl > net_size:
            s = base_scale * self.scalor ** face_count
            if np.floor(minl * s) <= 12:
                break
            scales += [s]
            face_count += 1

        total_boxes = []
        for scale in scales:
            hs = np.ceil(h * scale)
            ws = np.ceil(w * scale)
            hs = int(hs)
            ws = int(ws)

            im_data = cv2.resize(im, (ws, hs))
            input = Image2NArray(im_data, [127.5,127.5,127.5] )
            input = input.as_in_context(self.ctx)
            
            output_cls, output_reg = self.pnet(input)
            
            output_cls = output_cls.asnumpy().squeeze(axis=0)
            output_reg = output_reg.asnumpy().squeeze(axis=0)

            bboxes = self.generate_bbox(output_cls, output_reg, scale, self.threshold[0])

            if len(bboxes) <= 0:
                continue
            keep = py_nms(bboxes, 0.5, 'Union')

            if len(keep) <= 0:
                continue

            bboxes = bboxes[keep]
            #
            total_boxes.extend(bboxes)

        # NMS
        if len(total_boxes) <= 0:
            return None
        total_boxes = np.array(total_boxes)
        keep = py_nms(total_boxes, 0.7, 'Union')
        if len(keep) <= 0:
            return None
        return total_boxes[keep]
Пример #3
0
    def detect_ronet(self, img, bboxes, image_size):
        H, W, C = img.shape
        IMAGE_SIZE = image_size
        # 1, 先将bbox转换成矩形
        sb = []
        for i in range(bboxes.shape[0]):
            box = bboxes[i, :]
            sq = square_bbox(box)
            sb += [sq]

        # 2,pad
        crops = []
        origin_bbox = []
        for i in sb:
            size = i[2]
            sx0, sy0, sx1, sy1, dx0, dy0, dx1, dy1 = pad_bbox(i, W, H)
            crop = np.zeros((size, size, 3), dtype=np.uint8)
            if sx0 < 0 or sy0 < 0 or dx0 < 0 or dy0 < 0 or sx1 > W or sy1 > H or dx1 > size or dy1 > size:
                continue
            if sx0 > W or sy0 > H or dx0 > size or dy0 > size or sx1 < 0 or sy1 < 0 or dx1 < 0 or dy1 < 0:
                continue
            crop[dy0:dy1, dx0:dx1, :] = img[sy0:sy1, sx0:sx1, :]
            out = cv2.resize(crop, (IMAGE_SIZE, IMAGE_SIZE))
            out = out.astype(np.float32) - np.array([127.5,127.5,127.5], dtype=np.float32)
            out = out / 128
            out = out.swapaxes(1, 2).swapaxes(0, 1)
            crops += [out]
            origin_bbox += [i]

        # 3, 预测
        origin_bbox = np.array(origin_bbox)
        crops = np.array(crops)
        input = torch.from_numpy(crops).to(self.device)
        detector = self.rnet
        threshold = self.threshold[1]
        if image_size == 48:
            detector = self.onet
            threshold = self.threshold[2]
        detector.eval()
        with torch.no_grad():
            out = detector(input)

        # 4,映射
        ## out[0] -> N * 2
        ## out[1] -> N * 4
        cls_map = out[0].detach().cpu().numpy()
        reg = out[1].detach().cpu().numpy()
        landmark = out[2].detach().cpu().numpy()
        face_map = cls_map[:, 1]
        t_index = np.where(face_map > threshold)
        if t_index[0].shape[0] <= 0:
            return None

        # #
        origin_bbox = origin_bbox[t_index]
        score = face_map[t_index]
        reg_map = reg[t_index]
        landmark_map = landmark[t_index]
        dx = reg_map[:, 0]
        dy = reg_map[:, 1]
        dw = reg_map[:, 2]
        dh = reg_map[:, 3]

        # backward for smooth l1 loss(RCNN)
        dx *= IMAGE_SIZE
        dy *= IMAGE_SIZE
        dw = np.exp(dw) * IMAGE_SIZE
        dh = np.exp(dh) * IMAGE_SIZE
        landmark_map *= IMAGE_SIZE
        # add Gx AND Gy
        G = origin_bbox
        G = G.astype(np.float32)
        dx = dx / (float(IMAGE_SIZE) / G[:, 2]) + G[:, 0]
        dy = dy / (float(IMAGE_SIZE) / G[:, 3]) + G[:, 1]
        dw = dw / (float(IMAGE_SIZE) / G[:, 2])
        dh = dh / (float(IMAGE_SIZE) / G[:, 3])
        for i in range(5):
            landmark_map[:,i*2] = landmark_map[:,i*2] / (float(IMAGE_SIZE) / G[:, 2]) + G[:, 0]
            landmark_map[:,1+i*2] = landmark_map[:,1+i*2] / (float(IMAGE_SIZE) / G[:, 3]) + G[:, 1]
        #landmark_map = landmark_map / 
        # compose
        #print("dx",dx)
        #print(landmark_map)
        bbox = np.vstack([dx, dy, dw, dh, score])
        bbox = bbox.T
        #landmark_map = landmark_map.T
        bbox = np.hstack([bbox,landmark_map])
        # do nms
        if image_size == 24:
            keep = py_nms(bbox, 0.6, "Union")
            if len(keep) <= 0:
                return None
            return bbox[keep]

        if image_size == 48:
            keep = py_nms(bbox, 0.6, "Minimum")
            if len(keep) <= 0:
                return None
            return bbox[keep]
Пример #4
0
    def detect_ronet(self, img, bboxes, image_size):
        H, W, C = img.shape
        IMAGE_SIZE = image_size
      
        sb = []
        for i in range(bboxes.shape[0]):
            box = bboxes[i, :]
            sq = square_bbox(box)
            sb += [sq]

        #pad
        crops = []
        origin_bbox = []
        for i in sb:
            size = i[2]
            sx0, sy0, sx1, sy1, dx0, dy0, dx1, dy1 = pad_bbox(i, W, H)
            crop = np.zeros((size, size, 3), dtype=np.uint8)
            if sx0 < 0 or sy0 < 0 or dx0 < 0 or dy0 < 0 or sx1 > W or sy1 > H or dx1 > size or dy1 > size:
                continue
            crop[dy0:dy1, dx0:dx1, :] = img[sy0:sy1, sx0:sx1, :]
            out = cv2.resize(crop, (IMAGE_SIZE, IMAGE_SIZE))
            out = out.astype(np.float32) - np.array([127.5,127.5,127.5], dtype=np.float32)
            out = out.swapaxes(1, 2).swapaxes(0, 1)
            crops += [out]
            origin_bbox += [i]

        origin_bbox = np.array(origin_bbox)
        crops = nd.array(crops)
        input =  crops.as_in_context(self.ctx)
        detector = self.rnet
        threshold = self.threshold[1]
        if image_size == 48:
            detector = self.onet
            threshold = self.threshold[2]

        out = detector(input)

        cls_map = out[0].asnumpy()
        reg = out[1].asnumpy()
        face_map = cls_map[:, 1]
        t_index = np.where(face_map > threshold)
        if t_index[0].shape[0] <= 0:
            return None

        origin_bbox = origin_bbox[t_index]
        score = face_map[t_index]
        reg_map = reg[t_index]

        dx = reg_map[:, 0]
        dy = reg_map[:, 1]
        dw = reg_map[:, 2]
        dh = reg_map[:, 3]

        dx *= IMAGE_SIZE
        dy *= IMAGE_SIZE
        dw = np.exp(dw) * IMAGE_SIZE
        dh = np.exp(dh) * IMAGE_SIZE

        # add Gx AND Gy
        G = origin_bbox
        G = G.astype(np.float32)

        dx = dx / (float(IMAGE_SIZE) / G[:, 2]) + G[:, 0]
        dy = dy / (float(IMAGE_SIZE) / G[:, 3]) + G[:, 1]
        dw = dw / (float(IMAGE_SIZE) / G[:, 2])
        dh = dh / (float(IMAGE_SIZE) / G[:, 3])

        # compose
        bbox = np.vstack([dx, dy, dw, dh, score])
        bbox = bbox.T

        # do nms
        if image_size == 24:
            keep = py_nms(bbox, 0.7, "Union")
            if len(keep) <= 0:
                return None
            return bbox[keep]

        if image_size == 48:
            keep = py_nms(bbox, 0.7, "Minimum")
            if len(keep) <= 0:
                return None
            return bbox[keep]