예제 #1
0
파일: mtcnn.py 프로젝트: a3979873/SDD
    def detectFace(self, img, threshold):
        #-----------------------------#
        #   归一化
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5
        origin_h, origin_w, _ = copy_img.shape

        #-----------------------------#
        #   计算原始输入图像
        #   每一次缩放的比例
        #-----------------------------#
        scales = utils.calculateScales(img)
        out = []
        #-----------------------------#
        #   粗略计算人脸框
        #   pnet部分
        #-----------------------------#
        for scale in scales:
            hs = int(origin_h * scale)
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = scale_img.reshape(1, *scale_img.shape)
            ouput = self.Pnet.predict(inputs)
            out.append(ouput)

        image_num = len(scales)
        rectangles = []
        for i in range(image_num):
            # 有人脸的概率
            cls_prob = out[i][0][0][:, :, 1]
            # 其对应的框的位置
            roi = out[i][1][0]
            # 取出每个缩放后图片的长宽
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            # 解码过程
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                                1 / scales[i], origin_w,
                                                origin_h, threshold[0])
            rectangles.extend(rectangle)

        # for i in range(len(rectangles)):
        #     bbox = rectangles[i]
        #     crop_img = img[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]
        #     if bbox[3]-bbox[1]>80:
        #         cv2.imshow("crop_img",crop_img)
        #         cv2.waitKey(0)
        # 进行非极大抑制
        rectangles = utils.NMS(rectangles, 0.7)

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #   稍微精确计算人脸框
        #   Rnet部分
        #-----------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        predict_24_batch = np.array(predict_24_batch)
        out = self.Rnet.predict(predict_24_batch)
        # 可信度
        cls_prob = out[0]
        cls_prob = np.array(cls_prob)
        # 如何调整某一张图片对应的rectangle
        roi_prob = out[1]
        roi_prob = np.array(roi_prob)
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h, threshold[1])
        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #   计算人脸框
        #   onet部分
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)

        predict_batch = np.array(predict_batch)
        output = self.Onet.predict(predict_batch)
        cls_prob = output[0]
        roi_prob = output[1]
        pts_prob = output[2]

        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])
        return rectangles
예제 #2
0
    def detectFace(self, img, threshold):
        #-----------------------------#
        #   Normalization
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5

        origin_h, origin_w, _ = copy_img.shape

        #-----------------------------#
        #   calculate the zoom proportion of
        #   input image
        #-----------------------------#
        scales = utils.calculateScales(img)
        out = []
        #-----------------------------#
        #
        #   pnet part
        #-----------------------------#
        for scale in scales:
            hs = int(origin_h * scale)
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = scale_img.reshape(1, *scale_img.shape)
            ouput = self.Pnet.predict(inputs)
            out.append(ouput)

        image_num = len(scales)
        rectangles = []
        for i in range(image_num):
            # the proporation of face
            cls_prob = out[i][0][0][:, :, 1]
            # the position of square
            roi = out[i][1][0]
            # get the size of zoomed img
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            # encode the original size
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                                1 / scales[i], origin_w,
                                                origin_h, threshold[0])
            rectangles.extend(rectangle)

        # for i in range(len(rectangles)):
        #     bbox = rectangles[i]
        #     crop_img = img[int(bbox[1]):int(bbox[3]),int(bbox[0]):int(bbox[2])]
        #     if bbox[3]-bbox[1]>80:
        #         cv2.imshow("crop_img",crop_img)
        #         cv2.waitKey(0)
        # Non-maximum suppression
        rectangles = utils.NMS(rectangles, 0.7)

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #   get accurate position
        #   Rnet
        #-----------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        predict_24_batch = np.array(predict_24_batch)
        out = self.Rnet.predict(predict_24_batch)
        # Confidence
        cls_prob = out[0]
        cls_prob = np.array(cls_prob)
        # Adjustment rectangle of every image
        roi_prob = out[1]
        roi_prob = np.array(roi_prob)
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h, threshold[1])
        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #
        #   onet
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)

        predict_batch = np.array(predict_batch)
        output = self.Onet.predict(predict_batch)
        cls_prob = output[0]
        roi_prob = output[1]
        pts_prob = output[2]

        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])
        return rectangles
예제 #3
0
    def detectFace(self, img, threshold):
        #-----------------------------#
        #   归一化
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5
        origin_h, origin_w, _ = copy_img.shape
        #-----------------------------#
        #   计算原始输入图像
        #   每一次缩放的比例
        #-----------------------------#
        scales = utils.calculateScales(img)

        out = []
        #-----------------------------#
        #   粗略计算人脸框
        #   pnet部分
        #-----------------------------#
        for scale in scales:
            hs = int(origin_h * scale)
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = np.expand_dims(scale_img, 0)
            ouput = self.Pnet.predict(inputs)
            #---------------------------------------------#
            #   每次选取图像金字塔中的一张图片进行预测
            #   预测结果也是一张图片的,
            #   所以我们可以将对应的batch_size维度给消除掉
            #---------------------------------------------#
            ouput = [ouput[0][0], ouput[1][0]]
            out.append(ouput)

        rectangles = []
        #-------------------------------------------------#
        #   在这个地方我们对图像金字塔的预测结果进行循环
        #   取出每张图片的种类预测和回归预测结果
        #-------------------------------------------------#
        for i in range(len(scales)):
            #------------------------------------------------------------------#
            #   为了方便理解,这里和视频上看到的不太一样
            #   因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了
            #------------------------------------------------------------------#
            cls_prob = out[i][0][:, :, 1]
            roi = out[i][1]
            #-------------------------------------#
            #   取出每个缩放后图片的高宽
            #-------------------------------------#
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            #-------------------------------------#
            #   解码的过程
            #-------------------------------------#
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                                1 / scales[i], origin_w,
                                                origin_h, threshold[0])
            rectangles.extend(rectangle)

        #-------------------------------------#
        #   进行非极大抑制
        #-------------------------------------#
        rectangles = np.array(utils.NMS(rectangles, 0.7))

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #   稍微精确计算人脸框
        #   Rnet部分
        #-----------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            #------------------------------------------#
            #   利用获取到的粗略坐标,在原图上进行截取
            #------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            #-----------------------------------------------#
            #   将截取到的图片进行resize,调整成24x24的大小
            #-----------------------------------------------#
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        cls_prob, roi_prob = self.Rnet.predict(np.array(predict_24_batch))
        #-------------------------------------#
        #   解码的过程
        #-------------------------------------#
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h, threshold[1])

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------#
        #   计算人脸框
        #   onet部分
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            #------------------------------------------#
            #   利用获取到的粗略坐标,在原图上进行截取
            #------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            #-----------------------------------------------#
            #   将截取到的图片进行resize,调整成48x48的大小
            #-----------------------------------------------#
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)

        cls_prob, roi_prob, pts_prob = self.Onet.predict(
            np.array(predict_batch))

        #-------------------------------------#
        #   解码的过程
        #-------------------------------------#
        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])

        return rectangles
예제 #4
0
    def detectFace(self, img, threshold):
        #-----------------------------#
        #        归一化
        #-----------------------------#
        copy_img = (img.copy() - 127.5) / 127.5
        origin_h, origin_w, _ = copy_img.shape
        # print("orgin image's shape is: ", origin_h, origin_w)
        #-----------------------------#
        #        计算原始输入图像
        #        每一次缩放的比例
        #-----------------------------#
        scales = utils.calculateScales(img)

        out = []

        #-----------------------------#
        #        粗略计算人脸框
        #        pnet部分
        #-----------------------------#
        for scale in scales:
            pnet_inputs = []
            pnet_outputs = []
            hs = int(origin_h * scale)
            ws = int(origin_w * scale)
            scale_img = cv2.resize(copy_img, (ws, hs))
            inputs = np.expand_dims(scale_img, 0).astype(np.float32)

            pnet_inputs.append(
                tritonclient.http.InferInput(self.Pnet_inputs[0], inputs.shape,
                                             'FP32'))
            pnet_inputs[0].set_data_from_numpy(inputs, binary_data=True)

            pnet_outputs.append(
                tritonclient.http.InferRequestedOutput(self.Pnet_outputs[0],
                                                       binary_data=True))
            pnet_outputs.append(
                tritonclient.http.InferRequestedOutput(self.Pnet_outputs[1],
                                                       binary_data=True))

            t1 = time.time()
            output = self.triton_client.infer("pnet",
                                              inputs=pnet_inputs,
                                              outputs=pnet_outputs)
            t2 = time.time()
            # print('pnet cost: {}ms'.format(1000*(t2 - t1)))
            # print(output.as_numpy(self.Pnet_outputs[0]).shape)
            # print(output.as_numpy(self.Pnet_outputs[1]).shape)
            output = [
                output.as_numpy(self.Pnet_outputs[0])[0],
                output.as_numpy(self.Pnet_outputs[1])[0]
            ]
            out.append(output)

            # print(out)

        rectangles = []
        #-------------------------------------------------#
        #   在这个地方我们对图像金字塔的预测结果进行循环
        #   取出每张图片的种类预测和回归预测结果
        #-------------------------------------------------#
        for i in range(len(scales)):
            #------------------------------------------------------------------#
            #   为了方便理解,这里和视频上看到的不太一样
            #   因为我们在上面对图像金字塔循环的时候就把batch_size维度给去掉了
            #------------------------------------------------------------------#
            cls_prob = out[i][0][:, :, 1]
            roi = out[i][1]
            #--------------------------------------------#
            #   取出每个缩放后图片的高宽
            #--------------------------------------------#
            out_h, out_w = cls_prob.shape
            out_side = max(out_h, out_w)
            #--------------------------------------------#
            #   解码的过程
            #--------------------------------------------#
            rectangle = utils.detect_face_12net(cls_prob, roi, out_side,
                                                1 / scales[i], origin_w,
                                                origin_h, threshold[0])
            rectangles.extend(rectangle)

        #-----------------------------------------#
        #    进行非极大抑制
        #-----------------------------------------#
        rectangles = np.array(utils.NMS(rectangles, 0.7))
        # print(rectangles)

        if len(rectangles) == 0:
            return rectangles

        #-----------------------------------------#
        #    稍微精确计算人脸框
        #    Rnet部分
        #-----------------------------------------#
        predict_24_batch = []
        for rectangle in rectangles:
            #--------------------------------------------#
            #    利用获取到的粗略坐标,在原图上进行截取
            #--------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            #--------------------------------------------#
            #    将截取到的图片进行resize,调整成24x24的大小
            #--------------------------------------------#
            scale_img = cv2.resize(crop_img, (24, 24))
            predict_24_batch.append(scale_img)

        # print('rnet的输入: ', np.array(predict_24_batch).shape)

        rnet_inputs = []
        rnet_outputs = []
        rnet_inputs.append(
            tritonclient.http.InferInput(self.Rnet_inputs[0],
                                         np.array(predict_24_batch).shape,
                                         'FP32'))
        rnet_inputs[0].set_data_from_numpy(np.array(predict_24_batch).astype(
            np.float32),
                                           binary_data=True)

        rnet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Rnet_outputs[0],
                                                   binary_data=True))
        rnet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Rnet_outputs[1],
                                                   binary_data=True))

        t1 = time.time()
        output = self.triton_client.infer("rnet",
                                          inputs=rnet_inputs,
                                          outputs=rnet_outputs)
        t2 = time.time()
        # print('rnet cost: {}ms'.format(1000*(t2-t1)))
        # print(output.as_numpy(self.Rnet_outputs[0]).shape)
        # print(output.as_numpy(self.Rnet_outputs[1]).shape)
        cls_prob, roi_prob = output.as_numpy(
            self.Rnet_outputs[0]), output.as_numpy(self.Rnet_outputs[1])
        # print('cls_prob is: ')
        # print(cls_prob)
        # print('roi_prob is: ')
        # print(roi_prob)
        #-------------------------------------#
        #   解码的过程
        #-------------------------------------#
        rectangles = utils.filter_face_24net(cls_prob, roi_prob, rectangles,
                                             origin_w, origin_h, threshold[1])

        if len(rectangles) == 0:
            return rectangles

        # print(rectangles)

        #-----------------------------#
        #   计算人脸框
        #   onet部分
        #-----------------------------#
        predict_batch = []
        for rectangle in rectangles:
            #------------------------------------------#
            #   利用获取到的粗略坐标,在原图上进行截取
            #------------------------------------------#
            crop_img = copy_img[int(rectangle[1]):int(rectangle[3]),
                                int(rectangle[0]):int(rectangle[2])]
            #-----------------------------------------------#
            #   将截取到的图片进行resize,调整成48x48的大小
            #-----------------------------------------------#
            scale_img = cv2.resize(crop_img, (48, 48))
            predict_batch.append(scale_img)

        # print('onet的输入: ', np.array(predict_batch).shape)
        onet_inputs = []
        onet_outputs = []
        onet_inputs.append(
            tritonclient.http.InferInput(self.Onet_inputs[0],
                                         np.array(predict_batch).shape,
                                         'FP32'))
        onet_inputs[0].set_data_from_numpy(np.array(predict_batch).astype(
            np.float32),
                                           binary_data=True)

        onet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Onet_outputs[0],
                                                   binary_data=True))
        onet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Onet_outputs[1],
                                                   binary_data=True))
        onet_outputs.append(
            tritonclient.http.InferRequestedOutput(self.Onet_outputs[2],
                                                   binary_data=True))

        t1 = time.time()
        output = self.triton_client.infer("onet",
                                          inputs=onet_inputs,
                                          outputs=onet_outputs)
        t2 = time.time()
        # print('onet cost: {}ms'.format(1000*(t2-t1)))
        cls_prob, roi_prob, pts_prob = output.as_numpy(
            self.Onet_outputs[0]), output.as_numpy(
                self.Onet_outputs[1]), output.as_numpy(self.Onet_outputs[2])

        #-------------------------------------#
        #   解码的过程
        #-------------------------------------#
        # print('cls_prob:')
        # print(cls_prob)
        # print('roi_prob:')
        # print(roi_prob)
        # print('pts_prob:')
        # print(pts_prob)
        rectangles = utils.filter_face_48net(cls_prob, roi_prob, pts_prob,
                                             rectangles, origin_w, origin_h,
                                             threshold[2])

        return rectangles