Пример #1
0
    def oNetDetect(self, imge, rNetBoxes):
        imgDataset = []

        #无框返回
        if len(rNetBoxes) == 0:
            return np.array([])

        rNetBoxes = utils.convert_to_square(rNetBoxes)

        for box in rNetBoxes:
            imgeCRop = imge.crop(
                (int(box[0]), int(box[1]), int(box[2]), int(box[3])))
            imgeCrop = imgeCRop.resize((48, 48))

            imgData = self.imgTransform(imgeCrop)
            imgDataset.append(imgData)

        imgDataset = torch.stack(imgDataset)
        imgDataset = imgDataset.to(self.device)
        cons, offsets, landmak = self.oNet(imgDataset)

        return self.boxDetect(rNetBoxes,
                              cons,
                              offsets,
                              conMax=self.oCon,
                              nmsMax=self.oNms,
                              iouMode='min',
                              landMark=landmak)
Пример #2
0
    def rNetDetect(self, imge, pNetBoxes):

        #无框返回
        if len(pNetBoxes) == 0:
            return []

        imgDateSets = []
        pNetBoxes = utils.convert_to_square(pNetBoxes)

        #扣下P网络的图
        for pBox in pNetBoxes:

            imgeCrop = imge.crop(
                (int(pBox[0]), int(pBox[1]), int(pBox[2]), int(pBox[3])))
            imgeCrop = imgeCrop.resize((24, 24))

            imgData = self.imgTransform(imgeCrop)
            imgDateSets.append(imgData)

        #转换图片
        imgDateSets = torch.stack(imgDateSets)
        imgDateSets = imgDateSets.to(self.device)

        cons, offsets, landMark = self.rNet(imgDateSets)

        return self.boxDetect(pNetBoxes,
                              cons,
                              offsets,
                              conMax=self.rCon,
                              nmsMax=self.rNms,
                              landMark=landMark)
Пример #3
0
    def __onet_detect(self, image, rnet_boxes):

        _img_dataset = []
        _rnet_boxes = utils.convert_to_square(rnet_boxes)
        for _box in _rnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((48, 48))
            img_data = self.__image_transform(img)
            _img_dataset.append(img_data)

        img_dataset = Variable(torch.stack(_img_dataset))
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset, landmark = self.onet(img_dataset)

        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()
        landmark = landmark.cpu().data.numpy()

        boxes = []
        idxs, _ = np.where(cls > 0.7)
        for idx in idxs:
            _box = _rnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            ow = _x2 - _x1
            oh = _y2 - _y1

            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            px1 = _x1 + ow * landmark[idx][0]
            py1 = _y1 + oh * landmark[idx][1]
            px2 = _x1 + ow * landmark[idx][2]
            py2 = _y1 + oh * landmark[idx][3]
            px3 = _x1 + ow * landmark[idx][4]
            py3 = _y1 + oh * landmark[idx][5]
            px4 = _x1 + ow * landmark[idx][6]
            py4 = _y1 + oh * landmark[idx][7]
            px5 = _x1 + ow * landmark[idx][8]
            py5 = _y1 + oh * landmark[idx][9]

            boxes.append([
                x1, y1, x2, y2, cls[idx][0], px1, py1, px2, py2, px3, py3, px4,
                py4, px5, py5
            ])

        return utils.nms(np.array(boxes), 0.7, isMin=True)
Пример #4
0
    def __onet_detect(self, image, rnet_boxes):
        boxes = []
        _img_dataset = []
        _rnet_boxes = utils.convert_to_square(rnet_boxes)
        for _box in _rnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((48, 48))
            img_data = self.__image_transform(img)
            _img_dataset.append(img_data)

        # img_dataset = torch.stack(_img_dataset)
        img_dataset = tuple(_img_dataset)
        img_dataset = torch.cat(img_dataset, 2)
        img_dataset = torch.unsqueeze(img_dataset, 0)
        # print(img_dataset.size())
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.onet(img_dataset)
        _cls = _cls.view(1, -1)
        _cls = _cls.permute(1, 0)
        _offset = _offset.view(4, -1)
        _offset = _offset.permute(1, 0)
        # cls = _cls.cpu().detach().numpy()
        # offset = _offset.cpu().detach().numpy()
        cls = []
        offset = []
        for j in range(len(rnet_boxes)):
            cls.append(_cls[j * 6:j * 6 + 1])
            offset.append(_offset[j * 6:j * 6 + 1])
        cls = tuple(cls)
        cls = torch.cat(cls, 0).cpu().detach().numpy()
        offset = tuple(offset)
        offset = torch.cat(offset, 0).cpu().detach().numpy()

        idxs, _ = np.where(cls > 0.999)
        for idx in idxs:
            _box = rnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            ow = _x2 - _x1
            oh = _y2 - _y1

            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            boxes.append([x1, y1, x2, y2, cls[idx][0]])
        # print(len(boxes))
        return utils.nms(np.array(boxes), 0.2, isMin=True)
Пример #5
0
    def __rnet_detect(self, image, pnet_boxes):
        _img_dataset = []
        _pnet_boxes = utils.convert_to_square(pnet_boxes)
        for _box in _pnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((24, 24))
            img_data = self.__image_transform(img)
            # img_data = torch.Tensor(np.array(img) / 255. - 0.5)
            # img_data = img_data.permute(2, 0, 1)
            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.rnet(img_dataset)

        cls = _cls.cpu().data
        offset = _offset.cpu().data

        # for full convolution
        cls = cls.view(-1, 1)
        offset = offset.view(-1, 4)

        # idxs, _ = np.where(cls > 0.9)
        idxs = torch.nonzero(cls > 0.99)[:, 0]
        # idxs = torch.nonzero((cls > 0.90) & (cls < 0.95))[:, 0]

        _boxs = torch.tensor(_pnet_boxes)[idxs]
        # _x1 = np.array(_boxs[:, 0], dtype=np.int32)
        # _y1 = np.array(_boxs[:, 1], dtype=np.int32)
        # _x2 = np.array(_boxs[:, 2], dtype=np.int32)
        # _y2 = np.array(_boxs[:, 3], dtype=np.int32)
        _x1 = _boxs[:, 0]
        _y1 = _boxs[:, 1]
        _x2 = _boxs[:, 2]
        _y2 = _boxs[:, 3]

        ow = _x2 - _x1
        oh = _y2 - _y1
        x1 = _x1 + ow * offset[idxs][:, 0]
        y1 = _y1 + oh * offset[idxs][:, 1]
        x2 = _x2 + ow * offset[idxs][:, 2]
        y2 = _y2 + oh * offset[idxs][:, 3]

        # boxes = np.stack((x1, y1, x2, y2, cls[idxs][:, 0]), axis=1)
        boxes = torch.stack((x1, y1, x2, y2, cls[idxs][:, 0]), dim=1)

        return utils.nms(np.array(boxes), 0.1)
Пример #6
0
    def __rnet_detect(self, image, pnet_boxes):

        _img_dataset = []
        _pnet_boxes = utils.convert_to_square(pnet_boxes)

        for _box in _pnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((24, 24))
            img_data = self._image_transform(img)

            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.rnet(img_dataset)
        cls = _cls.cpu().data.numpy()

        offset = _offset.cpu().data.numpy()

        boxes = []
        idxs, _ = np.where(cls > 0.7)  # 0.6、0.7、0.8

        _box = _pnet_boxes[idxs]
        # print(_box.shape)  # (16, 5)
        _x1 = np.array(_box[:, 0])
        _y1 = np.array(_box[:, 1])
        _x2 = np.array(_box[:, 2])
        _y2 = np.array(_box[:, 3])

        ow = _x2 - _x1
        oh = _y2 - _y1

        offset = offset[idxs].T
        cls = cls[idxs].T

        x1 = _x1 + ow * offset[0, :]  # 偏移框反算到真实框
        y1 = _y1 + oh * offset[1, :]
        x2 = _x2 + ow * offset[2, :]
        y2 = _y2 + oh * offset[3, :]

        out_boxes = np.dstack((x1, y1, x2, y2, cls))
        # print(out_boxes.shape)  # (1, 16, 5)
        out_boxes = np.squeeze(out_boxes, 0)
        # print(out_boxes.shape)  # (16, 5)

        return utils.nms(np.array(out_boxes), 0.5)  # 0.5
Пример #7
0
    def __onet_detect(self, image, rnet_boxes):

        _img_dataset = []
        _rnet_boxes = utils.convert_to_square(rnet_boxes)
        for _box in _rnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((48, 48))
            img_data = self.__image_transform(img)
            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset, _alli = self.onet(img_dataset)
        cls = _cls.cpu().data.numpy()       # (1, 1)
        offset = _offset.cpu().data.numpy() # (1, 4)
        alli = _alli.cpu().data.numpy()

        idxs, _ = np.where(cls > o_cls)
        _box = _rnet_boxes[idxs]
        _x1 = _box[:, 0].astype('int32')
        _y1 = _box[:, 1].astype('int32')
        _x2 = _box[:, 2].astype('int32')
        _y2 = _box[:, 3].astype('int32')

        ow = _x2 - _x1
        oh = _y2 - _y1

        x1 = _x1 + ow * offset[idxs, 0]
        y1 = _y1 + oh * offset[idxs, 1]
        x2 = _x2 + ow * offset[idxs, 2]
        y2 = _y2 + oh * offset[idxs, 3]
        le_x = _x1 + ow * alli[idxs, 0]
        le_y = _y1 + oh * alli[idxs, 1]
        re_x = _x2 + ow * alli[idxs, 2]
        re_y = _y1 + oh * alli[idxs, 3]
        n_x = _x1 + ow * alli[idxs, 4]
        n_y = _y1 + oh * alli[idxs, 5]
        lm_x = _x1 + ow * alli[idxs, 6]
        lm_y = _y2 + oh * alli[idxs, 7]
        rm_x = _x2 + ow * alli[idxs, 8]
        rm_y = _y2 + oh * alli[idxs, 9]

        boxes = np.stack((x1, y1, x2, y2, cls[idxs].T[0], le_x, le_y, re_x, re_y, n_x, n_y, lm_x, lm_y, rm_x, rm_y)).T

        return utils.nms(np.array(boxes), o_nms, isMin=True) #iou is divided by smallest area no union
Пример #8
0
    def _rnet_detect(self, inputs, boxes):
        """
        通过PNet结果修正人脸框后送入RNet网络
        :param boxes:
        :return:
        """
        # 将pnet的box变成包含它的正方形,可以避免信息损失
        convert_to_square(boxes)

        rnet_box = Detector._pad(inputs, boxes, 24)

        cls, reg = self.rnet.predict(rnet_box)

        keep = np.where(cls[:, 1] > self._r_pro)
        if keep[0].size == 0:
            return np.empty((0, 4)), np.empty((0, 1))

        scores = cls[:, 1][keep]

        boxes = Detector._get_reg_box(boxes[keep], reg[keep])
        keep = nms(boxes, scores, 0.7, 'union')
        return boxes[keep], scores[keep]
Пример #9
0
    def __rnet_detect(self, image, pnet_boxes):
        boxes = []
        _img_dataset = []
        _pnet_boxes = utils.convert_to_square(pnet_boxes)
        for _box in _pnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((24, 24))
            img_data = self.__image_transform(img)
            _img_dataset.append(img_data)

        # img_dataset = torch.stack(_img_dataset)

        img_dataset = tuple(_img_dataset)
        img_dataset = torch.cat(img_dataset, 2)
        img_dataset = torch.unsqueeze(img_dataset, 0)
        # print(img_dataset.size())
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.rnet(img_dataset)
        _cls = _cls.view(1, -1)
        _cls = _cls.permute(1, 0)
        _offset = _offset.view(4, -1)
        _offset = _offset.permute(1, 0)
        cls = _cls.cpu().detach().numpy()
        offset = _offset.cpu().detach().numpy()
        idxs, _ = np.where(cls > 0.99)
        for idx in idxs:
            _box = _pnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            ow = _x2 - _x1
            oh = _y2 - _y1

            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            boxes.append([x1, y1, x2, y2, cls[idx][0]])

        return utils.nms(np.array(boxes), 0.4)
Пример #10
0
    def _onet_detect(self, inputs, boxes):
        convert_to_square(boxes)

        onet_box = Detector._pad(inputs, boxes, 48)

        cls, reg, marks = self.onet.predict(onet_box)

        keep = np.where(cls[:, 1] > self._o_pro)
        if keep[0].size == 0:
            return np.empty((0, 4)), np.empty((0, 1)), np.empty((0, 10))

        scores = cls[:, 1][keep]
        marks = marks[keep]
        boxes = boxes[keep]

        marks = np.reshape(marks, (-1, 5, 2))
        marks[:, :, 0] = marks[:, :, 0] * (boxes[:, 2:3] - boxes[:, 0:1]) + boxes[:, 0:1]
        marks[:, :, 1] = marks[:, :, 1] * (boxes[:, 3:4] - boxes[:, 1:2]) + boxes[:, 1:2]
        marks = np.int32(marks)

        boxes = Detector._get_reg_box(boxes, reg[keep])
        keep = nms(boxes, scores, 0.7, 'min')
        return boxes[keep], scores[keep], marks[keep]
Пример #11
0
    def __rnet_detect(self, image, pnet_boxes):

        _img_dataset = []
        _pnet_boxes = utils.convert_to_square(pnet_boxes)
        for _box in _pnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            image = np.array(image)
            image = Image.fromarray(image)

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((24, 24))
            img_data = self._image_transform(img)
            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.rnet(img_dataset)
        cls = _cls.cpu().data.numpy()

        offset = _offset.cpu().data.numpy()

        boxes = []
        idxs, _ = np.where(cls > 0.6)  # 0.7-0.8
        for idx in idxs:
            _box = _pnet_boxes[idx]  # 根据索引拿到裁剪框(预测框)的坐标,便于反算
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            ow = _x2 - _x1
            oh = _y2 - _y1

            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            # print(cls[idx][0])  # 拿到标量

            boxes.append([x1, y1, x2, y2, cls[idx][0]])

        return utils.nms(np.array(boxes), 0.5)
Пример #12
0
 def __rnet_detect(self, image, pnet_boxes):
     # 创建空列表,存放抠图
     _img_dataset = []
     # 给p网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形”,再抠图
     _pnet_boxes = utils.convert_to_square(pnet_boxes)
     # 遍历每个框,每个框返回框4个坐标点,抠图,放缩,数据类型转换,添加列表
     for _box in _pnet_boxes:
         _x1 = int(_box[0])
         _y1 = int(_box[1])
         _x2 = int(_box[2])
         _y2 = int(_box[3])
         img = image.crop((_x1, _y1, _x2, _y2))  # 根据4个坐标点抠图
         img = img.resize((24, 24))  # 放缩在固尺寸
         img_data = self.data_for(img)  # 将图片数组转成张量
         _img_dataset.append(img_data)
     # stack堆叠(默认在0轴),此处相当数据类型转换,list chw —> nchw
     img_dataset = torch.stack(_img_dataset)
     if self.isCuda:
         img_dataset = img_dataset.cuda()
     # 通过r网络训练
     _cond, _offset_face = self.rnet(img_dataset)
     cond = _cond.cpu().data.numpy()  # 将gpu上的数据放到cpu上去,在转成numpy数组
     offset_face = _offset_face.cpu().data.numpy()
     boxes = []
     # 原置信度0.6是偏低的,时候很多框并没有用(可打印出来观察),
     # 可以适当调高些;idxs置信度框大于0.6的索引;
     # ★返回idxs:0轴上索引[0,1],_:1轴上索引[0,0],共同决定元素位置,见例子3
     idxs, _ = np.where(cond > r_cls)
     # 得到框的数据
     _box = _pnet_boxes[idxs]
     # 框的两个坐标点
     _x1 = _box[:, 0].astype(np.int32)
     _y1 = _box[:, 1].astype(np.int32)
     _x2 = _box[:, 2].astype(np.int32)
     _y2 = _box[:, 3].astype(np.int32)
     # 框的宽和高
     ow = _x2 - _x1
     oh = _y2 - _y1
     # 实际框的坐标点
     x1 = _x1 + ow * offset_face[idxs][:, 0]  # 实际框的坐标点
     y1 = _y1 + oh * offset_face[idxs][:, 1]
     x2 = _x2 + ow * offset_face[idxs][:, 2]
     y2 = _y2 + oh * offset_face[idxs][:, 3]
     cls = cond[idxs][:, 0]
     boxes_ = np.array([x1, y1, x2, y2, cls], dtype=np.float64)
     boxes_aaaaaaa = np.swapaxes(boxes_, 1, 0)
     return utils.nms(np.array(boxes_aaaaaaa),
                      r_nms)  # 原r_nms为0.5(0.5要往小调),上面的0.6要往大调;小于0.5的框被保留下来
Пример #13
0
    def __onet_detect(self, image, rnet_boxes):

        _img_dataset = []  # 创建列表,存放抠图r
        _rnet_boxes = utils.convert_to_square(
            rnet_boxes)  # 给r网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形”
        for _box in _rnet_boxes:  # 遍历R网络筛选出来的框,计算坐标,抠图,缩放,数据类型转换,添加列表,堆叠
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))  # 根据坐标点“抠图”
            img = img.resize((48, 48))
            img_data = self.data_for(img)  # 将抠出的图转成张量
            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)  # 堆叠,此处相当数据格式转换,见例子2
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset, _offset_facial = self.onet(img_dataset)
        cls = _cls.cpu().data.numpy()  # (1, 1)
        offset = _offset.cpu().data.numpy()  # (1, 4)

        boxes = []  # 存放o网络的计算结果
        idxs, _ = np.where(
            cls > o_cls
        )  # 原o_cls为0.97是偏低的,最后要达到标准置信度要达到0.99999,这里可以写成0.99998,这样的话出来就全是人脸;留下置信度大于0.97的框;★返回idxs:0轴上索引[0],_:1轴上索引[0],共同决定元素位置,见例子3
        for idx in idxs:  # 根据索引,遍历符合条件的框;1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到)
            _box = _rnet_boxes[idx]  # 以R网络做为基准框
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            ow = _x2 - _x1  # 框的基准宽,框是“方”的,ow=oh
            oh = _y2 - _y1

            x1 = _x1 + ow * offset[idx][
                0]  # O网络最终生成的框的坐标;生样,偏移量△δ=x1-_x1/w*side_len
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            boxes.append([x1, y1, x2, y2, cls[idx][0]])  #返回4个坐标点和1个置信度

        return utils.nms(np.array(boxes), o_nms,
                         isMin=True)  # 用最小面积的IOU;原o_nms(IOU)为小于0.7的框被保留下来
Пример #14
0
    def __rnet_detect(self, image, pnet_boxes):
        _img_dataset = []
        _pnet_boxes = utils.convert_to_square(pnet_boxes)
        pool = mp.Pool(5)
        for _box in _pnet_boxes:
            _img_dataset.append(
                pool.apply_async(self.__crop_box, args=(
                    _box,
                    image,
                )))
        pool.close()
        pool.join()
        _img_dataset = [_img_data.get() for _img_data in _img_dataset]
        img_dataset = torch.stack(tuple(_img_dataset))
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.rnet(img_dataset)

        cls = _cls.cpu().data
        offset = _offset.cpu().data

        # idxs, _ = np.where(cls > 0.9)
        idxs = torch.nonzero(cls > 0.9)[:, 0]

        _boxs = torch.tensor(_pnet_boxes)[idxs]

        # _x1 = np.array(_boxs[:, 0], dtype=np.int32)
        # _y1 = np.array(_boxs[:, 1], dtype=np.int32)
        # _x2 = np.array(_boxs[:, 2], dtype=np.int32)
        # _y2 = np.array(_boxs[:, 3], dtype=np.int32)
        _x1 = _boxs[:, 0]
        _y1 = _boxs[:, 1]
        _x2 = _boxs[:, 2]
        _y2 = _boxs[:, 3]

        ow = _x2 - _x1
        oh = _y2 - _y1
        x1 = _x1 + ow * offset[idxs][:, 0]
        y1 = _y1 + oh * offset[idxs][:, 1]
        x2 = _x2 + ow * offset[idxs][:, 2]
        y2 = _y2 + oh * offset[idxs][:, 3]

        # boxes = np.stack((x1, y1, x2, y2, cls[idxs][:, 0]), axis=1)
        boxes = torch.stack((x1, y1, x2, y2, cls[idxs][:, 0]), dim=1)

        return utils.nms(np.array(boxes), 0.4)
Пример #15
0
    def __onet_detect(self, image, rnet_boxes):
        _img_dataset = []
        _rnet_boxes = utils.convert_to_square(rnet_boxes)
        for _box in _rnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((48, 48))
            img_data = self.__image_transform(img)
            # img_data = torch.Tensor(np.array(img) / 255. - 0.5)
            # img_data = img_data.permute(2, 0, 1)
            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.onet(img_dataset)
        cls = _cls.cpu().data
        offset = _offset.cpu().data

        # for full convolution
        cls = cls.view(-1, 1)
        offset = offset.view(-1, 4)

        idxs = torch.nonzero(cls > 0.9999)[:, 0]

        _boxs = torch.tensor(rnet_boxes)[idxs]
        _x1 = _boxs[:, 0]
        _y1 = _boxs[:, 1]
        _x2 = _boxs[:, 2]
        _y2 = _boxs[:, 3]

        ow = _x2 - _x1
        oh = _y2 - _y1

        x1 = _x1 + ow * offset[idxs][:, 0]
        y1 = _y1 + oh * offset[idxs][:, 1]
        x2 = _x2 + ow * offset[idxs][:, 2]
        y2 = _y2 + oh * offset[idxs][:, 3]

        boxes = torch.stack((x1, y1, x2, y2, cls[idxs][:, 0]), dim=1)
        # print(len(boxes))
        return utils.nms(np.array(boxes), 0.2, isMin=True)
Пример #16
0
    def __onet_detect(self, image, rnet_boxes):
        _img_dataset = []  # 创建列表,存放抠图r
        _rnet_boxes = utils.convert_to_square(
            rnet_boxes)  # 给r网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形”
        for _box in _rnet_boxes:  # 遍历R网络筛选出来的框,计算坐标,抠图,缩放,数据类型转换,添加列表,堆叠
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])
            img = image.crop((_x1, _y1, _x2, _y2))  # 根据坐标点“抠图”
            img = img.resize((48, 48))
            img_data = self.data_for(img)  # 将抠出的图转成张量
            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)  # 堆叠,此处相当数据格式转换,见例子2
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.onet(img_dataset)
        cls = _cls.cpu().data.numpy()  # (1, 1)
        offset = _offset.cpu().data.numpy()  # (1, 4)

        boxes = []  # 存放o网络的计算结果
        idxs, _ = np.where(
            cls > o_cls
        )  # 原o_cls为0.97是偏低的,最后要达到标准置信度要达到0.99999,这里可以写成0.99998,这样的话出来就全是人脸;留下置信度大于0.97的框;★返回idxs:0轴上索引[0],_:1轴上索引[0],共同决定元素位置,见例子3

        # 得到框的数据
        _box = _rnet_boxes[idxs]
        # 框的两个坐标点
        _x1 = _box[:, 0].astype(np.int32)
        _y1 = _box[:, 1].astype(np.int32)
        _x2 = _box[:, 2].astype(np.int32)
        _y2 = _box[:, 3].astype(np.int32)
        # 框的宽和高
        ow = _x2 - _x1
        oh = _y2 - _y1
        # 实际框的坐标点
        x1 = _x1 + ow * offset[idxs][:, 0]  # 实际框的坐标点
        y1 = _y1 + oh * offset[idxs][:, 1]
        x2 = _x2 + ow * offset[idxs][:, 2]
        y2 = _y2 + oh * offset[idxs][:, 3]
        cls = cls[idxs][:, 0]
        boxes_ = np.array([x1, y1, x2, y2, cls], dtype=np.float64)
        boxes_aaaaaaa = np.swapaxes(boxes_, 1, 0)
        return utils.nms(np.array(boxes_aaaaaaa), o_nms,
                         isMin=True)  # 用最小面积的IOU;原o_nms(IOU)为小于0.7的框被保留下来
Пример #17
0
    def __onet_detect(self, image, rnet_boxes):

        _img_dataset = []
        _rnet_boxes = utils.convert_to_square(rnet_boxes)
        for _box in _rnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((48, 48))
            img_data = self._image_transform(img)
            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.onet(img_dataset)

        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()

        boxes = []
        idxs, _ = np.where(cls > 0.95)  # 一般为0.99
        for idx in idxs:
            _box = _rnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            ow = _x2 - _x1
            oh = _y2 - _y1
            # 网络在输入裁剪图片的时候,坐标已经知道,无需反算预测框,直接反算预测框到真实框的位置即可。
            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            boxes.append([x1, y1, x2, y2, cls[idx][0]])

        return utils.nms(np.array(boxes), 0.7,
                         isMin=True)  # #阈值0.7。保留IOU小于0.7的框
Пример #18
0
    def __rnet_detect(self, image, pnet_boxes):
        _img_dataset = []
        _pnet_boxes = utils.convert_to_square(pnet_boxes)
        for _box in _pnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((24, 24))
            img_data = self.__image_transform(img)
            # img_data = torch.Tensor(np.array(img) / 255. - 0.5)
            # img_data = img_data.permute(2, 0, 1)
            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.rnet(img_dataset)

        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()

        boxes = []
        idxs, _ = np.where(cls > 0.9)
        for idx in idxs:
            _box = _pnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            ow = _x2 - _x1
            oh = _y2 - _y1

            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            boxes.append([x1, y1, x2, y2, cls[idx][0]])

        return utils.nms(np.array(boxes), 0.4)
Пример #19
0
    def __rnet_detect(self, image, pnet_boxes):

        _img_dataset = []
        _pnet_boxes = utils.convert_to_square(pnet_boxes) # convert box to square
        for _box in _pnet_boxes: # for loop is slow
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop((_x1, _y1, _x2, _y2)) # crop
            img = img.resize((24, 24)) # resize
            img_data = self.__image_transform(img)
            _img_dataset.append(img_data)

        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset, _ = self.rnet(img_dataset)

        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()

        idxs, _ = np.where(cls > r_cls)
        _box = _pnet_boxes[idxs]
        _x1 = _box[:,0].astype('int32')
        _y1 = _box[:,1].astype('int32')
        _x2 = _box[:,2].astype('int32')
        _y2 = _box[:,3].astype('int32')

        ow = _x2 - _x1
        oh = _y2 - _y1

        x1 = _x1 + ow * offset[idxs,0]
        y1 = _y1 + oh * offset[idxs,1]
        x2 = _x2 + ow * offset[idxs,2]
        y2 = _y2 + oh * offset[idxs,3]

        boxes = np.stack((x1,y1,x2,y2,cls[idxs].T[0])).T

        return utils.nms(np.array(boxes), r_nms)
Пример #20
0
    def __detect_rnet(self, image, pnet_boxes):
        _img_dataset = []
        _pnet_boxes = convert_to_square(pnet_boxes, image)
        for _box in _pnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            img = image.crop([_x1, _y1, _x2, _y2])
            img = img.resize((24, 24))

            img_data = self.__image_transform(img).to(self.device)
            _img_dataset.append(img_data)
        img_dataset = torch.stack(_img_dataset)
        _cls, _offset, _landmark = self.rnet(img_dataset)

        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()
        landmark = _landmark.cpu().data.numpy()

        index, _ = np.where(cls > 0.9)
        if len(index) == 0:
            return np.array([])
        boxes_ = pnet_boxes[index]
        cls = cls[index].reshape(-1, 1)
        offset = offset[index]
        landmark = landmark[index]
        wh = np.array([boxes_[:, i + 3] - boxes_[:, i + 1] for i in range(2)]).transpose(1, 0)
        print(np.tile(wh, (1, 2)))
        center = np.array([0.5 * (boxes_[:, i + 1] + boxes_[:, i + 3]) for i in range(2)]).transpose(1, 0)
        print(np.tile(center, (1, 2)))
        offset_boxes = offset * np.tile(wh, (1, 2)) + np.tile(center, (1, 2))
        landmark_boxes = landmark * np.tile(wh, (1, 5)) + np.tile(center, (1, 5))

        boxes = np.concatenate([cls, offset_boxes, landmark_boxes], axis=1)

        return nms(boxes, 0.6)
Пример #21
0
def save_hard_example(net, data,save_path):
    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image

    im_idx_list = data['images']
    # print(images[0])
    gt_boxes_list = data['bboxes']
    num_of_images = len(im_idx_list)

    print("processing %d images in total" % num_of_images)


    # save files
    neg_label_file = "../DATA/no_LM%d/neg_%d.txt" % (net, image_size)

    neg_file = open(neg_label_file, 'w+')

    pos_label_file = "../DATA/no_LM%d/pos_%d.txt" % (net, image_size)

    pos_file = open(pos_label_file, 'w+')

    part_label_file = "../DATA/no_LM%d/part_%d.txt" % (net, image_size)
    if not os.path.exists(part_label_file):
        os.mkdir(part_label_file)
    part_file = open(part_label_file, 'w+')
    #read detect result
    # test=os.path.join(save_path, 'detections.pkl')

    # if not os.path.exists(save_path):
    #     os.mkdir(save_path)
    # print(save_path)
    # print(os.path.join(save_path, 'detections.pkl'))
    # with open(os.path.join(save_path, 'detections.pkl'), 'w+b') as f:
    #     print("after opening it")
    #     det_boxes = pickle.load(f)
    #     f.close()
    det_boxes = pickle.load(open(os.path.join(save_path, 'detections.pkl'), 'rb'))
    # print(len(det_boxes), num_of_images)
    print(len(det_boxes))
    print(num_of_images)
    assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    #im_idx_list image index(list)
    #det_boxes detect result(list)
    #gt_boxes_list gt(list)
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        gts = np.array(gts, dtype=np.float32).reshape(-1, 4)
        if image_done % 100 == 0:
            print("%d images done" % image_done)
        image_done += 1

        if dets.shape[0] == 0:
            continue
        img = cv2.imread(im_idx)
        #change to square
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])
        neg_num = 0
        for box in dets:
            x_left, y_top, x_right, y_bottom, _ = box.astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1

            # ignore box that is too small or beyond image border
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
                continue

            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)

            # save negative images and write label
            # Iou with all gts must below 0.3
            if np.max(Iou) < 0.3 and neg_num < 60:
                #save the examples
                save_file = get_path(neg_dir, "%s.jpg" % n_idx)
                # print(save_file)
                neg_file.write(save_file + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
                neg_num += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = get_path(pos_dir, "%s.jpg" % p_idx)
                    pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    save_file = os.path.join(part_dir, "%s.jpg" % d_idx)
                    part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    neg_file.close()
    part_file.close()
    pos_file.close()
Пример #22
0
def inference():
    sess = tf.Session()
    with gfile.FastGFile(os.path.dirname(__file__) + '/model.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        sess.graph.as_default()
        tf.import_graph_def(graph_def, name='')

        # ops = tf.get_default_graph().get_operations()
        # print(ops)

    sess.run(tf.global_variables_initializer())
    input_image = sess.graph.get_tensor_by_name('input_image:0')
    landmark = sess.graph.get_tensor_by_name('ONet/landmark_fc/BiasAdd:0')

    data_file = "/mnt/data/changshuang/data/flickr/"
    anno_file = "/mnt/data/changshuang/data/aflw_anno.txt"
    # data: {'images': images, 'bboxes': bboxes, 'landmarks':landmarks}
    data = read_annotation(data_file, anno_file)
    img_data = list(zip(data["images"], data["bboxes"], data["landmarks"]))
    for img_path, img_bbox, img_landmarks in img_data:
        img = cv.imread(img_path)
        bbox = np.array(img_bbox)

        dets = convert_to_square(bbox)
        dets[:, 0:4] = np.round(dets[:, 0:4])
        h, w, c = img.shape
        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h)
        num_boxes = dets.shape[0]
        cropped_ims = np.zeros((num_boxes, 48, 48, 3), dtype=np.float32)
        for i in range(num_boxes):  # 17
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
            cropped_ims[i, :, :, :] = (cv.resize(tmp, (48, 48)) - 127.5) / 128

        t1 = time()
        # batch_size = 16
        # minibatch = []
        # cur = 0
        # n = cropped_ims.shape[0]
        # while cur < n:
        #     minibatch.append(cropped_ims[cur: min(cur + batch_size, n), :, :, :])
        #     cur += batch_size
        
        # landmark_pred_list = []
        # for data in minibatch:
        #     m = data.shape[0]
        #     real_size = batch_size
        #     # 最后一组数据不够一个batch的处理,m size as a batch
        #     if m < batch_size:
        #         keep_inds = np.arange(m)  # m = 5 keep_inds = [0,1,2,3,4]
        #         gap = batch_size - m  # batch_size = 7, gap = 2
        #         while gap >= len(keep_inds):
        #             gap -= len(keep_inds)  # -3
        #             keep_inds = np.concatenate((keep_inds, keep_inds))
        #         if gap != 0:
        #             keep_inds = np.concatenate((keep_inds, keep_inds[:gap]))
        #         data = data[keep_inds]
        #         real_size = m
        #     pre_landmarks = sess.run(landmark, feed_dict={input_image: data})
        #     landmark_pred_list.append(pre_landmarks[:real_size])
        # if len(landmark_pred_list) == 0:
        #     continue
        # else:
        #     pre_landmarks = np.concatenate(landmark_pred_list, axis=0)
        pre_landmarks = sess.run(landmark, feed_dict={input_image: cropped_ims})
        print(time() - t1)

        w = bbox[:, 2] - bbox[:, 0] + 1
        h = bbox[:, 3] - bbox[:, 1] + 1
        pre_landmarks[:, 0::2] = (np.tile(w, (5, 1)) * pre_landmarks[:, 0::2].T + np.tile(bbox[:, 0], (5, 1)) - 1).T
        pre_landmarks[:, 1::2] = (np.tile(h, (5, 1)) * pre_landmarks[:, 1::2].T + np.tile(bbox[:, 1], (5, 1)) - 1).T

        for i in range(bbox.shape[0]):
            box_gt = bbox[i, :4]
            corpbbox_gt = [int(box_gt[0]), int(box_gt[1]), int(box_gt[2]), int(box_gt[3])]
            # 画人脸框
            cv.rectangle(img, (corpbbox_gt[0], corpbbox_gt[1]), (corpbbox_gt[2], corpbbox_gt[3]), (0, 225, 255), 2)
        # 画关键点
        for i in range(pre_landmarks.shape[0]):
            for j in range(len(pre_landmarks[i]) // 2):
                cv.circle(img, (int(pre_landmarks[i][2 * j]), int(int(pre_landmarks[i][2 * j + 1]))), 3, (0, 0, 255), -1)
                cv.circle(img, (int(img_landmarks[i][2 * j]), int(int(img_landmarks[i][2 * j + 1]))), 3, (0, 255, 255), -1)
        cv.imshow('show image', img)
        k = cv.waitKey(0) & 0xFF
        if k == ord('q'):
            break
    cv.destroyAllWindows()
Пример #23
0
def gen_onet_data(data_dir, anno_file, p_model_path, r_model_path, prefix=''):
    '''

	:param data_dir: train dataset dir
	:param anno_file: annotation file
	:param pnet_model_file: pnet model file
	:param rnet_model_file: rnet model file
	:param prefix_path: origin image root dir
	:return:
	'''
    neg_save_dir = os.path.join(data_dir, '48_train/negative')
    pos_save_dir = os.path.join(data_dir, '48_train/positive')
    part_save_dir = os.path.join(data_dir, '48_train/part')

    neg_save_dir_val = os.path.join(data_dir, '48_val/negative')
    pos_save_dir_val = os.path.join(data_dir, '48_val/positive')
    part_save_dir_val = os.path.join(data_dir, '48_val/part')

    neg_save_dir_test = os.path.join(data_dir, '48_test/negative')
    pos_save_dir_test = os.path.join(data_dir, '48_test/positive')
    part_save_dir_test = os.path.join(data_dir, '48_test/part')

    per_train = 0.7
    per_val = 0.2
    per_test = 0.1

    image_size = 48

    for dir_path in [
            neg_save_dir, pos_save_dir, part_save_dir, neg_save_dir_val,
            pos_save_dir_val, part_save_dir_val, neg_save_dir_test,
            pos_save_dir_test, part_save_dir_test
    ]:
        if not os.path.exists(dir_path):
            os.mkdir(dir_path)

    post_save_file = os.path.join(config.ANNO_STORE_DIR,
                                  config.ONET_POSITIVE_ANNO_FILENAME)
    neg_save_file = os.path.join(config.ANNO_STORE_DIR,
                                 config.ONET_NEGATIVE_ANNO_FILENAME)
    part_save_file = os.path.join(config.ANNO_STORE_DIR,
                                  config.ONET_PART_ANNO_FILENAME)

    post_save_test_file = os.path.join(config.ANNO_STORE_DIR,
                                       config.ONET_POSITIVE_TEST_ANNO_FILENAME)
    neg_save_test_file = os.path.join(config.ANNO_STORE_DIR,
                                      config.ONET_NEGATIVE_TEST_ANNO_FILENAME)
    part_save_test_file = os.path.join(config.ANNO_STORE_DIR,
                                       config.ONET_PART_TEST_ANNO_FILENAME)

    post_save_val_file = os.path.join(config.ANNO_STORE_DIR,
                                      config.ONET_POSITIVE_VALID_ANNO_FILENAME)
    neg_save_val_file = os.path.join(config.ANNO_STORE_DIR,
                                     config.ONET_NEGATIVE_VALID_ANNO_FILENAME)
    part_save_val_file = os.path.join(config.ANNO_STORE_DIR,
                                      config.ONET_PART_VALID_ANNO_FILENAME)

    f1 = open(post_save_file, 'w')
    f2 = open(neg_save_file, 'w')
    f3 = open(part_save_file, 'w')

    f1_test = open(post_save_test_file, 'w')
    f2_test = open(neg_save_test_file, 'w')
    f3_test = open(part_save_test_file, 'w')

    f1_val = open(post_save_val_file, 'w')
    f2_val = open(neg_save_val_file, 'w')
    f3_val = open(part_save_val_file, 'w')

    with open(anno_file, 'r') as f:
        annotations = f.readlines()
        random.shuffle(annotations)

    num = len(annotations)

    pnet, rnet = creat_prnet(p_model_path, r_model_path, 'cuda: 1')
    prnetDetector = PRnetDetector(pnet=pnet, rnet=rnet, min_face_size=12)

    p_idx = 0
    n_idx = 0
    d_idx = 0
    image_done = 0

    all_boxes = list()
    for annotation in annotations[:10000]:
        try:
            annotation = annotation.strip().split(' ')
            path = os.path.join(prefix, annotation[0])
            bbox = list(map(float, annotation[1:])
                        )  # generate boxes randomly to get negtive images
            boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)
            per = random.randint(0, 10000)
            img = cv2.imread(path)

            b, boxes_align = prnetDetector.detect_face(img)
            if isinstance(boxes_align, tuple):
                continue
            if boxes_align is None:
                continue
            if boxes_align.shape[0] == 0:
                continue
            all_boxes.append(boxes_align)
            if image_done % 100 == 0:
                print("%d images done" % image_done)
            image_done += 1
            dets = convert_to_square(boxes_align)
            dets[:, 0:4] = np.round(dets[:, 0:4])
            for box in dets:
                x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
                width = x_right - x_left + 1
                height = y_bottom - y_top + 1

                if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[
                        1] - 1 or y_bottom > img.shape[0] - 1:
                    continue

                Iou = IoU(box, boxes)
                cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
                resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                        interpolation=cv2.INTER_LINEAR)

                # save negative images and write label
                if np.max(Iou) < 0.3:
                    # Iou with all gts must below 0.3
                    if per < 1000:
                        save_file = os.path.join(neg_save_dir_test,
                                                 "%s.jpg" % n_idx)
                        f2_test.write(save_file + ' 0\n')
                        cv2.imwrite(save_file, resized_im)
                    elif per < 3000:
                        save_file = os.path.join(neg_save_dir_val,
                                                 "%s.jpg" % n_idx)
                        f2_val.write(save_file + ' 0\n')
                        cv2.imwrite(save_file, resized_im)
                    else:
                        save_file = os.path.join(neg_save_dir,
                                                 "%s.jpg" % n_idx)
                        f2.write(save_file + ' 0\n')
                        cv2.imwrite(save_file, resized_im)
                    n_idx += 1
                else:
                    # find gt_box with the highest iou
                    idx = np.argmax(Iou)
                    assigned_gt = boxes[idx]
                    x1, y1, x2, y2 = assigned_gt

                    # compute bbox reg label
                    offset_x1 = (x1 - x_left) / float(width)
                    offset_y1 = (y1 - y_top) / float(height)
                    offset_x2 = (x2 - x_right) / float(width)
                    offset_y2 = (y2 - y_bottom) / float(height)

                    # save positive and part-face images and write labels
                    if np.max(Iou) >= 0.65:
                        if per < 1000:
                            save_file = os.path.join(pos_save_dir_test,
                                                     "%s.jpg" % p_idx)
                            f1_test.write(
                                save_file + ' 1 %.2f %.2f %.2f %.2f\n' %
                                (offset_x1, offset_y1, offset_x2, offset_y2))
                            cv2.imwrite(save_file, resized_im)
                        elif per < 3000:
                            save_file = os.path.join(pos_save_dir_val,
                                                     "%s.jpg" % p_idx)
                            f1_val.write(
                                save_file + ' 1 %.2f %.2f %.2f %.2f\n' %
                                (offset_x1, offset_y1, offset_x2, offset_y2))
                            cv2.imwrite(save_file, resized_im)
                        else:
                            save_file = os.path.join(pos_save_dir,
                                                     "%s.jpg" % p_idx)
                            f1.write(
                                save_file + ' 1 %.2f %.2f %.2f %.2f\n' %
                                (offset_x1, offset_y1, offset_x2, offset_y2))
                            cv2.imwrite(save_file, resized_im)
                        p_idx += 1

                    elif np.max(Iou) >= 0.4:
                        if per < 1000:
                            save_file = os.path.join(part_save_dir_test,
                                                     "%s.jpg" % p_idx)
                            f3_test.write(
                                save_file + ' -1 %.2f %.2f %.2f %.2f\n' %
                                (offset_x1, offset_y1, offset_x2, offset_y2))
                            cv2.imwrite(save_file, resized_im)
                        elif per < 3000:
                            save_file = os.path.join(part_save_dir_val,
                                                     "%s.jpg" % p_idx)
                            f3_val.write(
                                save_file + ' -1 %.2f %.2f %.2f %.2f\n' %
                                (offset_x1, offset_y1, offset_x2, offset_y2))
                            cv2.imwrite(save_file, resized_im)
                        else:
                            save_file = os.path.join(part_save_dir,
                                                     "%s.jpg" % p_idx)
                            f3.write(
                                save_file + ' -1 %.2f %.2f %.2f %.2f\n' %
                                (offset_x1, offset_y1, offset_x2, offset_y2))
                            cv2.imwrite(save_file, resized_im)
                        d_idx += 1
        except RuntimeError as e:
            if 'out of memory' in str(e):
                print('| WARNING: ran out of memory')
                if hasattr(torch.cuda, 'empty_cache'):
                    torch.cuda.empty_cache()
            else:
                raise e

    f1.close()
    f2.close()
    f3.close()
    f1_val.close()
    f2_val.close()
    f3_val.close()
    f1_test.close()
    f2_test.close()
    f3_test.close()
Пример #24
0
def save_hard_example(target, data, save_path):
    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image
    image_size = cfg.resize[target]
    im_idx_list = data['images']
    gt_boxes_list = data['bboxes']
    num_of_images = len(im_idx_list)

    neg_file = open(join(cfg.path_output_txt, '%s_neg.txt' % target), 'w')
    pos_file = open(join(cfg.path_output_txt, '%s_pos.txt' % target), 'w')
    part_file = open(join(cfg.path_output_txt, '%s_part.txt' % target), 'w')

    dirs = ['neg', 'part', 'pos']
    dirs = [join(cfg.path_output_files, '%s_%s' % (target, d)) for d in dirs]
    for d in dirs:
        if not os.path.exists(d): os.makedirs(d)

    det_boxes = pickle.load(open(save_path, 'rb'))

    assert len(
        det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    # image_done = 0
    #im_idx_list image index(list)
    #det_boxes detect result(list)
    #gt_boxes_list gt(list)
    for im_idx, dets, gts in tqdm(zip(im_idx_list, det_boxes, gt_boxes_list),
                                  total=num_of_images):
        gts = np.array(gts, dtype=np.float32).reshape(-1, 4)

        if dets.shape[0] == 0:
            continue
        img = cv2.imread(im_idx)
        #change to square
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])
        neg_num = 0
        for box in dets:
            x_left, y_top, x_right, y_bottom, _ = box.astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1

            # ignore box that is too small or beyond image border
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[
                    1] - 1 or y_bottom > img.shape[0] - 1:
                continue

            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)

            # save negative images and write label
            # Iou with all gts must below 0.3

            if np.max(Iou) < 0.3 and neg_num < 60:
                content = '0\n'
                SaveTxt(neg_file, target, n_idx, content, 'neg')
                SaveImg(resized_im, target, n_idx, 'neg')

                n_idx += 1
                neg_num += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    content = '1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2)
                    SaveTxt(pos_file, target, p_idx, content, 'pos')
                    SaveImg(resized_im, target, p_idx, 'pos')
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    content = '-1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2)
                    SaveTxt(part_file, target, d_idx, content, 'part')
                    SaveImg(resized_im, target, d_idx, 'part')
                    d_idx += 1
    neg_file.close()
    part_file.close()
    pos_file.close()
Пример #25
0
    def __onet_detect(self, image, rnet_boxes):
        ''
        '创建列表,存放抠图'
        _img_dataset = []

        '给r网络输出的框,找出中心点,沿着最大边长的两加边扩充成“正方形”'
        _rnet_boxes = utils.convert_to_square(rnet_boxes)
        '''遍历R网络筛选出来的框,计算坐标,抠图,缩放,数据类型转换,添加列表,堆叠'''
        for _box in _rnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            '根据坐标点“抠图”'
            img = image.crop((_x1, _y1, _x2, _y2))
            img = img.resize((48, 48))

            '将抠出的图转成张量'
            img_data = self.__image_transform(img)
            _img_dataset.append(img_data)

        '堆叠,此处相当于数据格式 转换'
        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        _cls, _offset = self.onet(img_dataset)
        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()

        '存放O网络的计算结果'
        boxes = []
        '''原o_cls为0.97是偏低的,最后要达到标准置信度要达到0.99999,这里可以写成0.99998,这样的话出来就全是人脸,
        留下置 信度大于0.97的框。 返回idx:0轴上索引[0], _:1轴上索引[0],共同决定元素位置。'''
        idxs, _ = np.where(cls > o_cls)

        '根据索引,遍历符合条件的框,1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到)'
        for idx in idxs:
            '以R网络做为基准框'
            _box = _rnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            '框的基准宽,框是“方”的,ow = oh'
            ow = _x2 - _x1
            oh = _y2 - _y1

            'O网络最终生成的框的坐标,生样,偏移量△δ=x1-_x1/w*side_len'
            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            '返回4个坐标点和1个置信度'
            boxes.append([x1, y1, x2, y2, cls[idx][0]])

        '用最小面积的IOU,原o_nms(IOU)为小于0.7的框被保留下来'
        return utils.nms(np.array(boxes), o_nms, isMin=True)
Пример #26
0
    def __rnet_detect(self, image, pnet_boxes):
        ''
        '创建空列表,存放抠图'
        _img_dataset = []

        '给P网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形”,再抠图'
        _pnet_boxes = utils.convert_to_square(pnet_boxes)

        '遍历每个框,每个框返回4个坐标点,抠图,放缩,数据类型转换,添加列表'
        for _box in _pnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            '根据4个坐标点枢图'
            img = image.crop((_x1, _y1, _x2, _y2))

            '放缩在固定尺寸'
            img = img.resize((24, 24))

            '将图片数组转成张量'
            img_data = self.__image_transform(img)

            _img_dataset.append(img_data)

        'stack堆叠(默认在0轴),此处相当数据类型转换,'
        img_dataset = torch.stack(_img_dataset)

        if self.isCuda:
            img_dataset = img_dataset.cuda()

        '将27*24的图片传入到网络再进行一次筛选'
        _cls, _offset = self.rnet(img_dataset)

        '将gpu上的数据放到cpu上去,在转成numpy数组'
        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()

        'R网络要留下来的框,存到boxes中'
        boxes = []
        '''原置信度0.6是偏低的时候很多框并没有用(可打印出来观察),可以适当调高些, 
        idxs置信度框大于0.6的索引;返回idx:0轴上索引[0, 1], _:1轴上索引[0,0],共同决定元素位置'''
        idxs, _ = np.where(cls > r_cls)

        '根据索引,遍历符合条件的框,1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到)'
        for idx in idxs:
            _box = _pnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            '基准框 的高'
            ow = _x2 - _x1
            oh = _y2 - _y1

            '实际框的坐标点'
            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            '返回4个点坐标点和置信度'
            boxes.append([x1, y1, x2, y2, cls[idx][0]])

        '原r_nms为0.5(0.5要往小调),上面的0.6要往大调,小于0.5的框被保留下来'
        return utils.nms(np.array(boxes), r_nms)
Пример #27
0
n_idx = 0
p_idx = 0
d_idx = 0
counter = 0

for image, boxes in tqdm(zip(im_idx_list, gt_boxes_list)):
    #TODO: dtype = np.int, origin is np.float32
    boxes = np.array(boxes, dtype=np.int).reshape(-1, 4)
    # print(image)
    img = cv2.imread(image)
    dets, _ = mtcnn_detector.detect(img)

    if dets.shape[0] == 0:
        continue  # if no boxes generate ,continue

    dets = convert_to_square(dets)
    dets = dets.astype(np.int)

    neg_num = 0
    pos_num = 0
    part_num = 0

    for det in dets:
        x1, y1, x2, y2, _ = det
        wd = x2 - x1 + 1
        ht = y2 - y1 + 1

        if wd < 24 or ht < 24 or x1 < 0 or y1 < 0 or\
          x2 > img.shape[0] or y2 > img.shape[0]:
            continue
Пример #28
0
    def __rnet_detect(self, image, pnet_boxes):
        # 创建空列表,存放抠图
        _img_dataset = []
        # 给p网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形”,再抠图
        _pnet_boxes = utils.convert_to_square(pnet_boxes)
        # 遍历每个框,每个框返回框4个坐标点,抠图,放缩,数据类型转换,添加列表
        for _box in _pnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])
            img = image.crop((_x1, _y1, _x2, _y2))  # 根据4个坐标点抠图
            img = img.resize((24, 24))  # 放缩在固尺寸
            img_data = self.data_for(img)  # 将图片数组转成张量
            _img_dataset.append(img_data)
        # stack堆叠(默认在0轴),此处相当数据类型转换,list chw —> nchw
        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            img_dataset = img_dataset.cuda()
        # 通过r网络训练
        _cond, _offset_face, _offset_facial = self.rnet(img_dataset)
        cond = _cond.cpu().data.numpy()  # 将gpu上的数据放到cpu上去,在转成numpy数组
        offset_face = _offset_face.cpu().data.numpy()
        offset_facial = _offset_facial.cpu().data.numpy()
        boxes = []
        # 原置信度0.6是偏低的,时候很多框并没有用(可打印出来观察),
        # 可以适当调高些;idxs置信度框大于0.6的索引;
        # ★返回idxs:0轴上索引[0,1],_:1轴上索引[0,0],共同决定元素位置,见例子3
        idxs, _ = np.where(cond > r_cls)
        # # 得到框的数据
        # _box = _pnet_boxes[idxs]
        # # 框的两个坐标点
        # _x1 = _box[:, 0].astype(np.int32)
        # _y1 = _box[:, 1].astype(np.int32)
        # _x2 = _box[:, 2].astype(np.int32)
        # _y2 = _box[:, 3].astype(np.int32)
        # # 框的宽和高
        # ow = _x2 - _x1
        # oh = _y2 - _y1
        # # 实际框的坐标点
        # x1 = _x1 + ow * offset_face[idxs][:, 0]  # 实际框的坐标点
        # y1 = _y1 + oh * offset_face[idxs][:, 1]
        # x2 = _x2 + ow * offset_face[idxs][:, 2]
        # y2 = _y2 + oh * offset_face[idxs][:, 3]
        # cls = cond[idxs][:, 0]
        # boxes_ = np.array([x1, y1, x2, y2, cls], dtype=np.float64)
        # boxes_aaaaaaa = np.swapaxes(boxes_, 1, 0)

        print(idxs, _)
        for idx in idxs:  # 根据索引,遍历符合条件的框;1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到)
            _box = _pnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            ow = _x2 - _x1  # 基准框的宽
            oh = _y2 - _y1

            x1 = _x1 + ow * offset_face[idx][0]  # 实际框的坐标点
            y1 = _y1 + oh * offset_face[idx][1]
            x2 = _x2 + ow * offset_face[idx][2]
            y2 = _y2 + oh * offset_face[idx][3]

            # efteye_x = _x1 + offset_facial[0]  # 左眼坐标点x
            # lefteye_y = _y1 + offset_facial[1]  # 左眼坐标点y
            # righteye_x = _x1 + offset_facial[2]  # 右眼坐标点x
            # righteye_y = _y1 + offset_facial[3]  # 右眼坐标点y
            # nose_x = _x1 + offset_facial[4]  # 鼻子坐标点x
            # nose_y = _y1 + offset_facial[5]  # 鼻子坐标点y
            # leftmouth_x = _x1 + offset_facial[6]  # 左嘴角坐标点x
            # leftmouth_y = _y1 + offset_facial[7]  # 左嘴角坐标点y
            # rightmouth_x = _x1 + offset_facial[8]  # 右嘴角坐标点x
            # rightmouth_y = _y1 + offset_facial[9]  # 右嘴角坐标点y

            boxes.append([x1, y1, x2, y2, cond[idx][0]])  # 返回4个坐标点和置信度
        return utils.nms(np.array(boxes),
                         r_nms)  # 原r_nms为0.5(0.5要往小调),上面的0.6要往大调;小于0.5的框被保留下来
Пример #29
0
def save_hard_example(save_size, data, neg_dir, pos_dir, part_dir, detectors):
  '''将网络识别的box用来裁剪原图像作为下一个网络的输入'''

  im_idx_list = data['images']

  gt_boxes_list = data['bboxes']
  num_of_images = len(im_idx_list)

  # save files
  neg_label_file = "data/%d/neg_%d.txt" % (save_size, save_size)
  neg_file = open(neg_label_file, 'w')

  pos_label_file = "data/%d/pos_%d.txt" % (save_size, save_size)
  pos_file = open(pos_label_file, 'w')

  part_label_file = "data/%d/part_%d.txt" % (save_size, save_size)
  part_file = open(part_label_file, 'w')
  #read detect result
  det_boxes = detectors
  # print(len(det_boxes), num_of_images)

  assert len(det_boxes) == num_of_images, "bboxes length equals not images"

  n_idx = 0
  p_idx = 0
  d_idx = 0
  image_done = 0

  for im_idx, dets, gts in tqdm(zip(im_idx_list, det_boxes, gt_boxes_list)):
    gts = np.array(gts, dtype=np.float32).reshape(-1, 4)
    image_done += 1

    if dets.shape[0] == 0:
      continue
    img = cv2.imread(im_idx)
    #转换成正方形
    dets = convert_to_square(dets)
    dets[:, 0:4] = np.round(dets[:, 0:4])
    neg_num = 0
    for box in dets:
      x_left, y_top, x_right, y_bottom, _ = box.astype(int)
      width = x_right - x_left + 1
      height = y_bottom - y_top + 1

      # 除去过小的
      if width < 24 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
        continue

      Iou = iou(box, gts)
      cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
      resized_im = cv2.resize(cropped_im, (save_size, save_size),
                              interpolation=cv2.INTER_LINEAR)

      #划分种类
      if np.max(Iou) < 0.3 and neg_num < 60:

        save_file = os.path.join(neg_dir, "%s.jpg" % n_idx)

        neg_file.write(save_file + ' 0\n')
        cv2.imwrite(save_file, resized_im)
        n_idx += 1
        neg_num += 1
      else:
        idx = np.argmax(Iou)
        assigned_gt = gts[idx]
        x1, y1, x2, y2 = assigned_gt

        #偏移量
        offset_x1 = (x1 - x_left) / float(width)
        offset_y1 = (y1 - y_top) / float(height)
        offset_x2 = (x2 - x_right) / float(width)
        offset_y2 = (y2 - y_bottom) / float(height)

        # pos和part
        if np.max(Iou) >= 0.65:
          save_file = os.path.join(pos_dir, "%s.jpg" % p_idx)
          pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
              offset_x1, offset_y1, offset_x2, offset_y2))
          cv2.imwrite(save_file, resized_im)
          p_idx += 1

        elif np.max(Iou) >= 0.4:
          save_file = os.path.join(part_dir, "%s.jpg" % d_idx)
          part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
              offset_x1, offset_y1, offset_x2, offset_y2))
          cv2.imwrite(save_file, resized_im)
          d_idx += 1
  neg_file.close()
  part_file.close()
  pos_file.close()
Пример #30
0
    w,h = image.size
    if w > h :
        return image.rotate(-90,expand = True)

if __name__ == '__main__':
    image_path = r"test_images"
    output_path = r"output"
    for i in os.listdir(image_path):
        detector = Detector()
        with Image.open(os.path.join(image_path,i)) as im:
            print(i)
            print("----------------------------")
            im.load()
            im = rotate(im)
            boxes = detector.detect(im)
            boxes = utils.convert_to_square(boxes)
            print("size:",im.size)
            imDraw = ImageDraw.Draw(im)
            cx = im.size[0]/2
            cy = im.size[1]/2
            for box in boxes:
                x1 = int(box[0])
                y1 = int(box[1])
                x2 = int(box[2])
                y2 = int(box[3])
                le_x = int(box[5])
                le_y = int(box[6])
                re_x = int(box[7])
                re_y = int(box[8])
                n_x = int(box[9])
                n_y = int(box[10])