def detect_face(self, img):
        """
            detect face over img
        Parameters:
        ----------
            img: numpy array, bgr order of shape (1, 3, n, m)
                input image
        Retures:
        -------
            bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
                bboxes
            points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
                landmarks
        """

        # check input
        MIN_DET_SIZE = 12

        if img is None:
            return None

        # only works for color image
        if len(img.shape) != 3:
            return None

        # detected boxes
        total_boxes = []

        height, width, _ = img.shape
        minl = min(height, width)

        # get all the valid scales
        scales = []
        m = MIN_DET_SIZE / self.minsize
        minl *= m
        factor_count = 0
        while minl > MIN_DET_SIZE:
            scales.append(m * self.factor**factor_count)
            minl *= self.factor
            factor_count += 1

        #############################################
        # first stage
        #############################################
        #for scale in scales:
        #    return_boxes = self.detect_first_stage(img, scale, 0)
        #    if return_boxes is not None:
        #        total_boxes.append(return_boxes)

        sliced_index = self.slice_index(len(scales))
        total_boxes = []
        for batch in sliced_index:
            local_boxes = self.Pool.map( detect_first_stage_warpper, \
                    izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
            total_boxes.extend(local_boxes)

        # remove the Nones
        total_boxes = [i for i in total_boxes if i is not None]

        if len(total_boxes) == 0:
            return None

        total_boxes = np.vstack(total_boxes)

        if total_boxes.size == 0:
            return None

        # merge the detection from first stage
        pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
        total_boxes = total_boxes[pick]

        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1

        # refine the bboxes
        total_boxes = np.vstack([
            total_boxes[:, 0] + total_boxes[:, 5] * bbw,
            total_boxes[:, 1] + total_boxes[:, 6] * bbh,
            total_boxes[:, 2] + total_boxes[:, 7] * bbw,
            total_boxes[:, 3] + total_boxes[:, 8] * bbh, total_boxes[:, 4]
        ])

        total_boxes = total_boxes.T
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])

        #############################################
        # second stage
        #############################################
        num_box = total_boxes.shape[0]

        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, width, height)
        # (3, 24, 24) is the input shape for RNet
        input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)

        for i in range(num_box):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))

        output = self.RNet.predict(input_buf)

        # filter the total_boxes with threshold
        passed = np.where(output[1][:, 1] > self.threshold[1])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            return None

        total_boxes[:, 4] = output[1][passed, 1].reshape((-1, ))
        reg = output[0][passed]

        # nms
        pick = nms(total_boxes, 0.7, 'Union')
        total_boxes = total_boxes[pick]
        total_boxes = self.calibrate_box(total_boxes, reg[pick])
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])

        #############################################
        # third stage
        #############################################
        num_box = total_boxes.shape[0]

        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, width, height)
        # (3, 48, 48) is the input shape for ONet
        input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)

        for i in range(num_box):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))

        output = self.ONet.predict(input_buf)

        # filter the total_boxes with threshold
        passed = np.where(output[2][:, 1] > self.threshold[2])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            return None

        total_boxes[:, 4] = output[2][passed, 1].reshape((-1, ))
        reg = output[1][passed]
        points = output[0][passed]

        # compute landmark points
        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
        points[:, 0:5] = np.expand_dims(
            total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
        points[:, 5:10] = np.expand_dims(
            total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]

        # nms
        total_boxes = self.calibrate_box(total_boxes, reg)
        pick = nms(total_boxes, 0.7, 'Min')
        total_boxes = total_boxes[pick]
        points = points[pick]

        if not self.accurate_landmark:
            return total_boxes, points

        #############################################
        # extended stage
        #############################################
        num_box = total_boxes.shape[0]
        patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1,
                            total_boxes[:, 3] - total_boxes[:, 1] + 1)
        patchw = np.round(patchw * 0.25)

        # make it even
        patchw[np.where(np.mod(patchw, 2) == 1)] += 1

        input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
        for i in range(5):
            x, y = points[:, i], points[:, i + 5]
            x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
            [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(
                np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width,
                height)
            for j in range(num_box):
                tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
                tmpim[dy[j]:edy[j] + 1,
                      dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1,
                                                 x[j]:ex[j] + 1, :]
                input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(
                    cv2.resize(tmpim, (24, 24)))

        output = self.LNet.predict(input_buf)

        pointx = np.zeros((num_box, 5))
        pointy = np.zeros((num_box, 5))

        for k in range(5):
            # do not make a large movement
            tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
            output[k][tmp_index[0]] = 0.5

            pointx[:, k] = np.round(points[:, k] -
                                    0.5 * patchw) + output[k][:, 0] * patchw
            pointy[:, k] = np.round(points[:, k + 5] -
                                    0.5 * patchw) + output[k][:, 1] * patchw

        points = np.hstack([pointx, pointy])
        points = points.astype(np.int32)

        return total_boxes, points
Пример #2
0
    def detect_face_limited(self, img, det_type=2):
        height, width, _ = img.shape
        if det_type >= 2:
            total_boxes = np.array(
                [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
                dtype=np.float32)
            num_box = total_boxes.shape[0]

            # pad the bbox
            [dy, edy, dx, edx, y, ey, x, ex, tmpw,
             tmph] = self.pad(total_boxes, width, height)
            # (3, 24, 24) is the input shape for RNet
            input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)

            for i in range(num_box):
                tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                tmp[dy[i]:edy[i] + 1,
                    dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                               x[i]:ex[i] + 1, :]
                input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))

            output = self.RNet.predict(input_buf)

            # filter the total_boxes with threshold
            passed = np.where(output[1][:, 1] > self.threshold[1])
            total_boxes = total_boxes[passed]

            if total_boxes.size == 0:
                return None

            total_boxes[:, 4] = output[1][passed, 1].reshape((-1, ))
            reg = output[0][passed]

            # nms
            pick = nms(total_boxes, 0.7, 'Union')
            total_boxes = total_boxes[pick]
            total_boxes = self.calibrate_box(total_boxes, reg[pick])
            total_boxes = self.convert_to_square(total_boxes)
            total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
        else:
            total_boxes = np.array(
                [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
                dtype=np.float32)
        num_box = total_boxes.shape[0]
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, width, height)
        # (3, 48, 48) is the input shape for ONet
        input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)

        for i in range(num_box):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))

        output = self.ONet.predict(input_buf)

        # filter the total_boxes with threshold
        passed = np.where(output[2][:, 1] > self.threshold[2])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            return None

        total_boxes[:, 4] = output[2][passed, 1].reshape((-1, ))
        reg = output[1][passed]
        points = output[0][passed]

        # compute landmark points
        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
        points[:, 0:5] = np.expand_dims(
            total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
        points[:, 5:10] = np.expand_dims(
            total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]

        # nms
        total_boxes = self.calibrate_box(total_boxes, reg)
        pick = nms(total_boxes, 0.7, 'Min')
        total_boxes = total_boxes[pick]
        points = points[pick]

        if not self.accurate_landmark:
            return total_boxes, points

        #############################################
        # extended stage
        #############################################
        num_box = total_boxes.shape[0]
        patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1,
                            total_boxes[:, 3] - total_boxes[:, 1] + 1)
        patchw = np.round(patchw * 0.25)

        # make it even
        patchw[np.where(np.mod(patchw, 2) == 1)] += 1

        input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
        for i in range(5):
            x, y = points[:, i], points[:, i + 5]
            x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
            [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(
                np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width,
                height)
            for j in range(num_box):
                tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
                tmpim[dy[j]:edy[j] + 1,
                      dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1,
                                                 x[j]:ex[j] + 1, :]
                input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(
                    cv2.resize(tmpim, (24, 24)))

        output = self.LNet.predict(input_buf)

        pointx = np.zeros((num_box, 5))
        pointy = np.zeros((num_box, 5))

        for k in range(5):
            # do not make a large movement
            tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
            output[k][tmp_index[0]] = 0.5

            pointx[:, k] = np.round(points[:, k] -
                                    0.5 * patchw) + output[k][:, 0] * patchw
            pointy[:, k] = np.round(points[:, k + 5] -
                                    0.5 * patchw) + output[k][:, 1] * patchw

        points = np.hstack([pointx, pointy])
        points = points.astype(np.int32)

        return total_boxes, points
Пример #3
0
    def get_landmark(self, img, total_boxes, det_type=0):
        """
            detect face over img
        Parameters:
        ----------
            img: numpy array, bgr order of shape (1, 3, n, m)
                input image
        Retures:
        -------
            bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
                bboxes
            points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
                landmarks
        """

        # check input
        height, width, _ = img.shape
        if det_type == 0:
            MIN_DET_SIZE = 12

            if img is None:
                return None

            if total_boxes is None:
                return None

            # only works for color image
            if len(img.shape) != 3:
                return None

        #############################################
        # third stage
        #############################################
        num_box = total_boxes.shape[0]

        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, width, height)
        # (3, 48, 48) is the input shape for ONet
        input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)

        for i in range(num_box):
            if (tmph[i] > 0):
                tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
                if (edy[i] <= 0):
                    tmp[dy[i]:edy[i] + 1,
                        dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                   x[i]:ex[i] + 1, :]
                    input_buf[i, :, :, :] = adjust_input(
                        cv2.resize(tmp, (48, 48)))

        output = self.ONet.predict(input_buf)

        # filter the total_boxes with threshold
        total_boxes[:, 4] = output[2][:, 1].reshape((-1, ))
        reg = output[1][:]
        points = output[0][:]

        # compute landmark points
        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
        points[:, 0:5] = np.expand_dims(
            total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
        points[:, 5:10] = np.expand_dims(
            total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]

        if not self.accurate_landmark:
            return total_boxes, points

        #############################################
        # extended stage
        #############################################
        num_box = total_boxes.shape[0]
        patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1,
                            total_boxes[:, 3] - total_boxes[:, 1] + 1)
        patchw = np.round(patchw * 0.25)

        # make it even
        patchw[np.where(np.mod(patchw, 2) == 1)] += 1

        input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
        for i in range(5):
            x, y = points[:, i], points[:, i + 5]
            x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
            [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(
                np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width,
                height)
            for j in range(num_box):
                tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
                tmpim[dy[j]:edy[j] + 1,
                      dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1,
                                                 x[j]:ex[j] + 1, :]
                input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(
                    cv2.resize(tmpim, (24, 24)))

        output = self.LNet.predict(input_buf)

        pointx = np.zeros((num_box, 5))
        pointy = np.zeros((num_box, 5))

        for k in range(5):
            # do not make a large movement
            tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
            output[k][tmp_index[0]] = 0.5

            pointx[:, k] = np.round(points[:, k] -
                                    0.5 * patchw) + output[k][:, 0] * patchw
            pointy[:, k] = np.round(points[:, k + 5] -
                                    0.5 * patchw) + output[k][:, 1] * patchw

        points = np.hstack([pointx, pointy])
        points = points.astype(np.int32)

        return points
Пример #4
0
    def detect_face(self, img):
        """
            detect face over img
        Parameters:
        ----------
            img: numpy array, bgr order of shape (1, 3, n, m)
                input image
        Retures:
        -------
            bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
                bboxes
            points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
                landmarks
        """

        # check input
        MIN_DET_SIZE = 12

        if img is None:
            return None

        # only works for color image
        if len(img.shape) != 3:
            return None

        # detected boxes
        total_boxes = []

        height, width, _ = img.shape
        minl = min(height, width)

        # get all the valid scales
        scales = []
        m = MIN_DET_SIZE / self.minsize
        minl *= m
        factor_count = 0
        while minl > MIN_DET_SIZE:
            scales.append(m * self.factor ** factor_count)
            minl *= self.factor
            factor_count += 1

        #############################################
        # first stage
        #############################################
        # for scale in scales:
        #    return_boxes = self.detect_first_stage(img, scale, 0)
        #    if return_boxes is not None:
        #        total_boxes.append(return_boxes)

        sliced_index = self.slice_index(len(scales))
        total_boxes = []
        for batch in sliced_index:
            local_boxes = self.Pool.map(detect_first_stage_warpper,
                                        zip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch],
                                            repeat(self.threshold[0])))
            total_boxes.extend(local_boxes)

        # remove the Nones 
        total_boxes = [i for i in total_boxes if i is not None]

        if len(total_boxes) == 0:
            return None

        total_boxes = np.vstack(total_boxes)

        if total_boxes.size == 0:
            return None

        # merge the detection from first stage
        pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
        total_boxes = total_boxes[pick]

        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1

        # refine the bboxes
        total_boxes = np.vstack([total_boxes[:, 0] + total_boxes[:, 5] * bbw,
                                 total_boxes[:, 1] + total_boxes[:, 6] * bbh,
                                 total_boxes[:, 2] + total_boxes[:, 7] * bbw,
                                 total_boxes[:, 3] + total_boxes[:, 8] * bbh,
                                 total_boxes[:, 4]
                                 ])

        total_boxes = total_boxes.T
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])

        #############################################
        # second stage
        #############################################
        num_box = total_boxes.shape[0]

        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
        # (3, 24, 24) is the input shape for RNet
        input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)

        for i in range(num_box):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))

        output = self.RNet.predict(input_buf)

        # filter the total_boxes with threshold
        passed = np.where(output[1][:, 1] > self.threshold[1])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            return None

        total_boxes[:, 4] = output[1][passed, 1].reshape((-1,))
        reg = output[0][passed]

        # nms
        pick = nms(total_boxes, 0.7, 'Union')
        total_boxes = total_boxes[pick]
        total_boxes = self.calibrate_box(total_boxes, reg[pick])
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])

        #############################################
        # third stage
        #############################################
        num_box = total_boxes.shape[0]

        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
        # (3, 48, 48) is the input shape for ONet
        input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)

        for i in range(num_box):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))

        output = self.ONet.predict(input_buf)

        # filter the total_boxes with threshold
        passed = np.where(output[2][:, 1] > self.threshold[2])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            return None

        total_boxes[:, 4] = output[2][passed, 1].reshape((-1,))
        reg = output[1][passed]
        points = output[0][passed]

        # compute landmark points
        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
        points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
        points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]

        # nms
        total_boxes = self.calibrate_box(total_boxes, reg)
        pick = nms(total_boxes, 0.7, 'Min')
        total_boxes = total_boxes[pick]
        points = points[pick]

        if not self.accurate_landmark:
            return total_boxes, points

        #############################################
        # extended stage
        #############################################
        num_box = total_boxes.shape[0]
        patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1, total_boxes[:, 3] - total_boxes[:, 1] + 1)
        patchw = np.round(patchw * 0.25)

        # make it even
        patchw[np.where(np.mod(patchw, 2) == 1)] += 1

        input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
        for i in range(5):
            x, y = points[:, i], points[:, i + 5]
            x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
            [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T,
                                                                    width,
                                                                    height)
            for j in range(num_box):
                tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
                tmpim[dy[j]:edy[j] + 1, dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1, x[j]:ex[j] + 1, :]
                input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))

        output = self.LNet.predict(input_buf)

        pointx = np.zeros((num_box, 5))
        pointy = np.zeros((num_box, 5))

        for k in range(5):
            # do not make a large movement
            tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
            output[k][tmp_index[0]] = 0.5

            pointx[:, k] = np.round(points[:, k] - 0.5 * patchw) + output[k][:, 0] * patchw
            pointy[:, k] = np.round(points[:, k + 5] - 0.5 * patchw) + output[k][:, 1] * patchw

        points = np.hstack([pointx, pointy])
        points = points.astype(np.int32)

        return total_boxes, points
Пример #5
0
    def detect_face_limited(self, img, det_type=2):
        height, width, _ = img.shape
        if det_type>=2:
          total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
          num_box = total_boxes.shape[0]

          # pad the bbox
          [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
          # (3, 24, 24) is the input shape for RNet
          input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)

          for i in range(num_box):
              tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
              tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
              input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))

          output = self.RNet.predict(input_buf)

          # filter the total_boxes with threshold
          passed = np.where(output[1][:, 1] > self.threshold[1])
          total_boxes = total_boxes[passed]

          if total_boxes.size == 0:
              return None

          total_boxes[:, 4] = output[1][passed, 1].reshape((-1,))
          reg = output[0][passed]

          # nms
          pick = nms(total_boxes, 0.7, 'Union')
          total_boxes = total_boxes[pick]
          total_boxes = self.calibrate_box(total_boxes, reg[pick])
          total_boxes = self.convert_to_square(total_boxes)
          total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
        else:
          total_boxes = np.array( [ [0.0, 0.0, img.shape[1], img.shape[0], 0.9] ] ,dtype=np.float32)
        num_box = total_boxes.shape[0]
        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height)
        # (3, 48, 48) is the input shape for ONet
        input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)

        for i in range(num_box):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            tmp[dy[i]:edy[i]+1, dx[i]:edx[i]+1, :] = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))

        output = self.ONet.predict(input_buf)
        #print(output[2])

        # filter the total_boxes with threshold
        passed = np.where(output[2][:, 1] > self.threshold[2])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            return None

        total_boxes[:, 4] = output[2][passed, 1].reshape((-1,))
        reg = output[1][passed]
        points = output[0][passed]

        # compute landmark points
        bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
        bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
        points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
        points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]

        # nms
        total_boxes = self.calibrate_box(total_boxes, reg)
        pick = nms(total_boxes, 0.7, 'Min')
        total_boxes = total_boxes[pick]
        points = points[pick]
        
        if not self.accurate_landmark:
            return total_boxes, points

        #############################################
        # extended stage
        #############################################
        num_box = total_boxes.shape[0]
        patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
        patchw = np.round(patchw*0.25)

        # make it even
        patchw[np.where(np.mod(patchw,2) == 1)] += 1

        input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
        for i in range(5):
            x, y = points[:, i], points[:, i+5]
            x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
            [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
                                                                    width,
                                                                    height)
            for j in range(num_box):
                tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
                tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
                input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))

        output = self.LNet.predict(input_buf)

        pointx = np.zeros((num_box, 5))
        pointy = np.zeros((num_box, 5))

        for k in range(5):
            # do not make a large movement
            tmp_index = np.where(np.abs(output[k]-0.5) > 0.35)
            output[k][tmp_index[0]] = 0.5

            pointx[:, k] = np.round(points[:, k] - 0.5*patchw) + output[k][:, 0]*patchw
            pointy[:, k] = np.round(points[:, k+5] - 0.5*patchw) + output[k][:, 1]*patchw

        points = np.hstack([pointx, pointy])
        points = points.astype(np.int32)

        return total_boxes, points
Пример #6
0
    def detect_face(self, img):
        """
            detect face over img
        Parameters:
        ----------
            img: numpy array, bgr order of shape (1, 3, n, m)
                input image
        Retures:
        -------
            bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
                bboxes
            points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
                landmarks
        """

        # check input

        global_start_time = time()
        global_first_start_time = time()

        if img is None:
            return None

        # only works for color image
        if len(img.shape) != 3:
            return None

        # detected boxes
#        total_boxes = []

#        height, width, _ = img.shape
#        minl = min( height, width)

# get all the valid scales
#        scales = []
#        m = MIN_DET_SIZE/self.minsize
#        minl *= m
#        factor_count = 0
#        while minl > MIN_DET_SIZE:
#            scales.append(m*self.factor**factor_count)
#            minl *= self.factor
#            factor_count += 1

#############################################
# first stage
#############################################
        total_boxes = []
        i = 0
        self.index = []
        self.t = []
        for scale in self.scales:
            return_boxes = detect_first_stage(img, i, self.threshold[0],
                                              self.ctx)
            if return_boxes is not None:
                total_boxes.append(return_boxes)
            i += 1
#          return_boxes = self.Pool.apply_async(detect_first_stage_warpper, (img, i, self.threshold[0], self.ctx))
# self.index.append(i)
# return_boxes = self.Pool.map(detect_first_stage_warpper, \
#         izip(repeat(img), [i]))
# start_time1 = time()
#self.t.append(MyThread((img, self.executor1[i], scale, self.threshold[0], self.ctx)))
#self.t[i].start()
# i += 1

# for j in range(i):
#     self.t[j].join()
#     return_boxes = self.t[j].return_boxes
# if return_boxes is not None:
#     total_boxes.append(return_boxes)

# end_time1 = time()
#print 'append time: %.4f'%(end_time1 - start_time1)

# self.Pool.close()
# self.Pool.join()
#        print 'first stage time:%.4f'%(end_time - start_time)
#print 'first stage end'
#        sliced_index = self.slice_index(len(scales))
#        total_boxes = []
#        for batch in sliced_index:
#            local_boxes = self.Pool.map( detect_first_stage_warpper, \
#                    izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
#            total_boxes.extend(local_boxes)

# remove the Nones
        total_boxes = [i for i in total_boxes if i is not None]

        if len(total_boxes) == 0:
            if has_landmark == True:
                return None, None
            else:
                return None
            return None

        #print 'before'
        #print len(total_boxes)
        total_boxes = np.vstack(total_boxes)

        #print 'after'
        #print total_boxes.shape
        if total_boxes.size == 0:
            if has_landmark == True:
                return None, None
            else:
                return None
            return None

        # merge the detection from first stage
        #print 'global nms:'  + str(total_boxes.shape[0])
        total_boxes.dtype = 'float32'
        pick = gpu_nms(total_boxes[:, 0:5], float(0.7), GPU_ID)
        #pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
        total_boxes = total_boxes[pick]
        #print 'global nms time:%.4f'%(end_time - start_time)

        # refine the bboxes
        if first_has_reg == True:
            bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
            bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
        #    total_boxes = np.vstack([total_boxes[:, 0]+total_boxes[:, 5] * bbw,
        #                             total_boxes[:, 1]+total_boxes[:, 6] * bbh,
        #                             total_boxes[:, 2]+total_boxes[:, 7] * bbw,
        #                             total_boxes[:, 3]+total_boxes[:, 8] * bbh,
        #                             total_boxes[:, 4]
        #                             ])

        #   total_boxes = total_boxes.T
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])

        #return total_boxes
        #############################################
        # second stage
        #############################################
        num_box = total_boxes.shape[0]
        print 'first stage num: %d' % (num_box)

        #return total_boxes
        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, self.width, self.height)
        # (3, 24, 24) is the input shape for RNet
        input_buf = np.zeros((self.second_stage_num, 3, 24, 24),
                             dtype=np.float32)

        #print 'global_first time;%.4f'%(global_first_end_time - global_first_start_time)

        for i in range(num_box):
            if i >= self.second_stage_num:
                break
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            # tmp = img[y[i]:ey[i]+1, x[i]:ex[i]+1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
        #    input_buf[i, :, :, :] = adjust_input(mx.image.imresize(tmp, 24, 24).asnumpy())
        #print 'prepare data: %.4f'%(end_time - start_time)

        if len(input_buf) < self.second_stage_num:
            input_buf = np.lib.pad(
                input_buf, ((self.second_stage_num - len(input_buf), 0),
                            (0, 0), (0, 0), (0, 0)), 'constant')
        #print 'first stage :' + str(num_box)

        if True:
            # start_time = time()
            # data_shape = [("data", input_buf.shape)]
            # input_shapes = dict(data_shape)
            # self.executor2 = self.executor2.reshape(allow_up_sizing = True, **input_shapes)
            # end_time = time()
            # print 'reshape time: %.4f'%(end_time - start_time)
            #executor = self.RNet.simple_bind(ctx = self.ctx, **input_shapes)
            #for key in executor.arg_dict.keys():
            #    if key in self.arg_params2:
            #        self.arg_params2[key].copyto(executor.arg_dict[key])

            #root_path = '/media/disk1/yangfan/wider_faces/mtcnn_data/'

            start_time = time()
            self.executor2.forward(is_train=False, data=input_buf)
            output1 = self.executor2.outputs[0].asnumpy()
            output2 = self.executor2.outputs[1].asnumpy()
            #  print 'test1'
            end_time = time()
        #    print 'second stage time: %.4f'%(end_time - start_time)
#  print output.shape
# print end_time - start_time
#output = self.RNet.predict(input_buf)
# print output[:,:]

# filter the total_boxes with threshold
        if has_reg == True:
            passed = np.where(output1[:, 1] > self.threshold[1])
        else:
            #    print output.shape
            passed = np.where(output[:, 1] > self.threshold[1])

        #print output1[:, :]
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            if has_landmark == True:
                return None, None
            else:
                return None
    # print output2
        if has_reg == True:
            total_boxes[:, 4] = output1[passed, 1].reshape((-1, ))
            reg = output2[passed]
        else:
            total_boxes[:, 4] = output[passed, 1].reshape((-1, ))

        # nms
        pick = gpu_nms(total_boxes, 0.7, GPU_ID)
        total_boxes = total_boxes[pick]
        if has_reg == True:
            total_boxes = self.calibrate_box(total_boxes, reg[pick])
        total_boxes = self.convert_to_square(total_boxes)
        total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
        #print 'second nms:%.4f'%(end_time -start_time)

        #############################################
        # third stage
        #############################################
        num_box = total_boxes.shape[0]

        # pad the bbox
        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes, self.width, self.height)
        # (3, 48, 48) is the input shape for ONet
        input_buf = np.zeros((self.third_stage_num, 3, 48, 48),
                             dtype=np.float32)

        #global_second_end_time = time()
        #print 'global second time:%.4f'%(global_second_end_time - global_second_start_time)

        #global_third_start_time = time()

        #start_time = time()
        for i in range(num_box):
            if i >= self.third_stage_num:
                break
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))

        if len(input_buf) < self.third_stage_num:
            input_buf = np.lib.pad(input_buf,
                                   (self.third_stage_num - len(input_buf, 0),
                                    (0, 0), (0, 0), (0, 0)), 'constant')
        print 'second stage :' + str(num_box)
        #end_time = time()
        #print 'prepare data third stage:%.4f'%(end_time - start_time)
        #return total_boxes
        if True:
            # data_shape = [("data", input_buf.shape)]
            # input_shapes = dict(data_shape)
            # executor = self.ONet.simple_bind(ctx = self.ctx, **input_shapes)
            # for key in executor.arg_dict.keys():
            #     if key in self.arg_params3:
            #         self.arg_params3[key].copyto(executor.arg_dict[key])

            #root_path = '/media/disk1/yangfan/wider_faces/mtcnn_data/'

            # start_time = time()
            # data_shape = [("data", input_buf.shape)]
            # input_shapes = dict(data_shape)
            # self.executor3 = self.executor3.reshape(allow_up_sizing = True, **input_shapes)
            # end_time = time()

            # print 'reshape time: %.4f'%(end_time - start_time)
            #   start_time = time()
            self.executor3.forward(is_train=False, data=input_buf)
            output1 = self.executor3.outputs[0].asnumpy()
            output2 = self.executor3.outputs[1].asnumpy()
            output3 = self.executor3.outputs[2].asnumpy()
            output3_1 = self.executor3.outputs[3].asnumpy()
            print output3_1.shape
#  print 'test1'
#  end_time = time()
#    print 'third stage time: %.4f'%(end_time - start_time)
#  print output.shape
# print end_time - start_time
#output = self.RNet.predict(input_buf)
#output = self.ONet.predict(input_buf)

#  print output
# filter the total_boxes with threshold
        passed = np.where(output1[:, 1] > self.threshold[2])
        total_boxes = total_boxes[passed]

        if total_boxes.size == 0:
            if has_landmark == True:
                return None, None
            else:
                return None

        total_boxes[:, 4] = output1[passed, 1].reshape((-1, ))
        if has_reg == True:
            reg = output2[passed]
        if has_landmark == True:
            points = output3[passed]

        # compute landmark points
        if has_landmark == True:
            bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
            bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
            #for i in range(len(points)):
            for t in range(10):
                if t % 2 == 0:
                    points[:, t] = points[:, t] * bbw + total_boxes[:, 0]
                else:
                    points[:, t] = points[:, t] * bbh + total_boxes[:, 1]
            #points[:, 0:5] = np.expand_dims(total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
            #points[:, 5:10] = np.expand_dims(total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]

        # nms
#     start_time = time()
        if has_reg == True:
            total_boxes = self.calibrate_box(total_boxes, reg)

        pick = nms(total_boxes, 0.7, 'Min')
        total_boxes = total_boxes[pick]
        if has_landmark == True:
            points = points[pick]
    #  global_end_time = time()
    #  print 'third time %.4f'%(global_end_time - start_time)
    #  print 'global time %.4f'%(global_end_time - global_start_time)
    #  print 'global third time: %.4f'%(global_end_time - global_third_start_time)
        if not self.accurate_landmark:
            if has_landmark == True:
                return total_boxes, points
            else:
                return total_boxes

        #return total_boxes, points
        #############################################
        # extended stage
        #############################################
        num_box = total_boxes.shape[0]
        # patchw = np.maximum(total_boxes[:, 2]-total_boxes[:, 0]+1, total_boxes[:, 3]-total_boxes[:, 1]+1)
        # patchw = np.round(patchw*0.25)

        # make it even
        # patchw[np.where(np.mod(patchw,2) == 1)] += 1

        #  input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
        #  for i in range(5):
        #      x, y = points[:, i], points[:, i+5]
        #      x, y = np.round(x-0.5*patchw), np.round(y-0.5*patchw)
        #      [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(np.vstack([x, y, x+patchw-1, y+patchw-1]).T,
        #          width,
        #          height)
        #      for j in range(num_box):
        #          tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
        #          tmpim[dy[j]:edy[j]+1, dx[j]:edx[j]+1, :] = img[y[j]:ey[j]+1, x[j]:ex[j]+1, :]
        #          input_buf[j, i*3:i*3+3, :, :] = adjust_input(cv2.resize(tmpim, (24, 24)))

        total_boxes_tmp = self.convert_to_square(total_boxes)
        #total_boxes_tmp = total_boxes.copy()

        total_boxes_tmp[:, 0:4] = np.round(total_boxes_tmp[:, 0:4])
        if False:
            width = total_boxes_tmp[:, 2] - total_boxes_tmp[:, 0]
            height = total_boxes_tmp[:, 3] - total_boxes_tmp[:, 1]
            total_boxes_tmp[:, 0] += np.round(0.1 * (width))
            # index = np.where(total_boxes_tmp[:, 0] < 0)
            # total_boxes_tmp[index, 0] = 0

            total_boxes_tmp[:, 1] += np.round(0.1 * (height))
            # index = np.where(total_boxes_tmp[:, 1] < 0)
            # total_boxes_tmp[index, 1] = 0

            total_boxes_tmp[:, 2] -= np.round(0.1 * (width))
            #      index = np.where(total_boxes_tmp[:, 2] >= self.width)
            #      total_boxes_tmp[index, 2] = self.width - 1

            total_boxes_tmp[:, 3] -= np.round(0.1 * (height))
    #     index = np.where(total_boxes_tmp[:, 3] >= self.height)
    #     total_boxes_tmp[index, 3] = self.height - 1

        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = self.pad(total_boxes_tmp, self.width, self.height)
        input_buf = np.zeros((self.fourth_stage_num, 3, 48, 48),
                             dtype=np.float32)
        input_buf2 = np.zeros((self.fourth_stage_num, 3, 48, 48),
                              dtype=np.float32)
        input_buf3 = np.zeros((self.fourth_stage_num, 3, 64, 64),
                              dtype=np.float32)
        input_buf4 = np.zeros((self.fourth_stage_num, 3, 96, 96),
                              dtype=np.float32)
        #input_buf_rotate = np.zeros((self.fourth_stage_num, 3, 48, 48), dtype=np.float32)

        num_box = len(total_boxes_tmp)
        index = np.zeros((self.fourth_stage_num), dtype=np.uint8)
        for i in range(num_box):
            if i >= self.fourth_stage_num:
                break
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
            if tmph[i] > 100 or tmpw[i] > 100:
                index[i] = 1
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
                                                             x[i]:ex[i] + 1, :]
            #    tmp = img[y[i]: ey[i] + 1, x[i]: ex[i] + 1, :]
            input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
            # height = tmp.shape[0]
            # width = tmp.shape[1]
            # if height > 80 or width > 80:
            #     tmp = cv2.resize(tmp, (height / 8, width / 8))
            input_buf2[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
            input_buf3[i, :, :, :] = adjust_input(cv2.resize(tmp, (64, 64)))
            input_buf4[i, :, :, :] = adjust_input(cv2.resize(tmp, (96, 96)))

            #input_buf_rotate[i] = input_buf[i].copy()

        if len(input_buf) < self.fourth_stage_num:
            input_buf = np.lib.pad(input_buf,
                                   (0, self.fourth_stage_num - len(input_buf)),
                                   (0, 0), (0, 0), (0, 0), 'constant')
            input_buf2 = np.lib.pad(
                input_buf2, (0, self.fourth_stage_num - len(input_buf2)),
                (0, 0), (0, 0), (0, 0), 'constant')
            input_buf3 = np.lib.pad(
                input_buf3, (0, self.fourth_stage_num - len(input_buf3)),
                (0, 0), (0, 0), (0, 0), 'constant')
            input_buf4 = np.lib.pad(
                input_buf4, (0, self.fourth_stage_num - len(input_buf4)),
                (0, 0), (0, 0), (0, 0), 'constant')
        #print 'third stage :' + str(num_box)

        #print 'prepare data fourth stage: %.4f'%(end_time - start_time)

        self.executor4_0.forward(is_train=False, data=input_buf)
        output0_0 = self.executor4_0.outputs[0].asnumpy()
        output0_1 = self.executor4_0.outputs[1].asnumpy()
        output0_2 = self.executor4_0.outputs[2].asnumpy()
        output0_0 *= 90.
        output0_1 *= 90.
        output0_2 *= 90.
        #for t in range(input_buf_rotate.shape[0]):
        #     if output0_2[t] > 15 or output0_2[t] < -15:
        #         tmp_img = input_buf_rotate[t].transpose((1, 2, 0))
        #         tmp_img = tmp_img / 0.0078125 + 127.5
        #         angle = output0_2[t]
        #         scale = 0.9
        #         rotateMat = cv2.getRotationMatrix2D((48 / 2, 48 / 2), angle, scale)
        #         rotateImg = cv2.warpAffine(tmp_img, rotateMat, (48, 48))

        #         rotateImg = rotateImg.transpose((2, 0, 1))
        #         rotateImg = (rotateImg - 127.5) * 0.007812
        #         input_buf_rotate[t, :, :, :] = rotateImg

        self.executor4_1.forward(is_train=False, data=input_buf)
        self.executor4_3.forward(is_train=False, data=input_buf)
        # self.executor4_4.forward(is_train = False, data = input_buf2)

        output1 = self.executor4_1.outputs[0].asnumpy()
        output2 = self.executor4_3.outputs[0].asnumpy()
        # output2_1 = self.executor4_4.outputs[0].asnumpy()

        # pick = np.argmax(output2, axis = 1)
        # pick = (pick * 10 + 5) / 100.0
        # pick = np.reshape(pick, (pick.shape[0], 1))
        # output4 = self.executor4.outputs[3].asnumpy()

        #print 'cnn fourth stage: %.4f'%(end_time - start_time)
        # output = self.LNet.predict(input_buf)

        if num_box > self.fourth_stage_num:
            num_box = self.fourth_stage_num

    # for tt in range(num_box):
    #     if index[tt] == 0:
    #         output2[tt, :] = output2_1[tt, :]

        total_boxes = np.hstack([
            total_boxes_tmp[0:num_box], output1[0:num_box, 0:1],
            output2[0:num_box, 1:2]
        ])

        #return total_boxes[0:num_box], points[0: num_box]
        self.executor4_2.forward(is_train=False, data=input_buf)
        output3 = self.executor4_2.outputs[0].asnumpy()

        self.executor4_5.forward(is_train=False, data=input_buf)
        output4 = self.executor4_5.outputs[0].asnumpy()
        # print 'cnn fifth stage: %.4f'%(end_time - start_time)

        #       for i in range(101):
        #           output1[0:num_box, 0] += i * output1[0:num_box, i]
        #pick = np.argmax(output1, axis = 1)

        #pick = pick * 10
        #pick = np.reshape(pick, (pick.shape[0], 1))
        total_boxes = np.hstack(
            [total_boxes[0:num_box], output3[0:num_box, 1:2]])

        # total_boxes[0:num_box, 5] = output1[:, 0]

        self.executor5.forward(is_train=False, data=input_buf2)
        output1 = self.executor5.outputs[0].asnumpy()

        age = np.zeros((num_box, 1), dtype=np.float32)
        for i in range(num_box):
            age[i] = output1[i][0] * 1.0 + output1[i][1] * 5.0 + output1[i][
                2] * 11 + output1[i][3] * 16 + output1[i][4] * 23 + output1[i][
                    5] * 28 + output1[i][6] * 33 + output1[i][7] * 40

        pick = np.argmax(output1, axis=1)
        #pick = (pick - 1) * 5 + 10
        pick = np.reshape(pick, (pick.shape[0], 1))

        output1 = np.max(output1, axis=1)
        output1 = np.reshape(output1, (output1.shape[0], 1))

        total_boxes = np.hstack([
            total_boxes[0:num_box], output1[0:num_box], pick[0:num_box], age,
            output4[0:num_box, 1:2], output0_0[0:num_box],
            output0_1[0:num_box], output0_2[0:num_box]
        ])

        self.executor_true.forward(is_train=False, data=input_buf3)
        output1 = self.executor_true.outputs[0].asnumpy()

        self.executor_clear.forward(is_train=False, data=input_buf4)
        output2 = self.executor_clear.outputs[0].asnumpy()

        total_boxes = np.hstack([
            total_boxes[0:num_box], output1[0:num_box, 1:2], output2[0:num_box,
                                                                     1:2]
        ])

        return total_boxes[0:num_box], points[0:num_box]

        pointx = np.zeros((num_box, 5))
        pointy = np.zeros((num_box, 5))

        for k in range(5):
            # do not make a large movement
            tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
            output[k][tmp_index[0]] = 0.5

            pointx[:, k] = np.round(points[:, k] -
                                    0.5 * patchw) + output[k][:, 0] * patchw
            pointy[:, k] = np.round(points[:, k + 5] -
                                    0.5 * patchw) + output[k][:, 1] * patchw

        points = np.hstack([pointx, pointy])
        points = points.astype(np.int32)

        return total_boxes, points