Exemplo n.º 1
0
class Retina_Detector:
    def __init__(self):
        torch.set_grad_enabled(False)
        cudnn.benchmark = True
        self.opt=get_config()
        if self.opt.network == "mobile0.25":
            self.cfg = cfg_mnet
        elif self.opt.network == "resnet50":
            self.cfg = cfg_re50
        # net and model
        self.net = RetinaFace(cfg=self.cfg, phase = 'test')
        self.net = self.load_model(self.net, self.opt.trained_model, self.opt.cpu)
        self.net.eval()
       
        self.net = self.net.to(self.opt.device)


    def check_keys(self,model, pretrained_state_dict):
        ckpt_keys = set(pretrained_state_dict.keys())
        model_keys = set(model.state_dict().keys())
        used_pretrained_keys = model_keys & ckpt_keys
        unused_pretrained_keys = ckpt_keys - model_keys
        missing_keys = model_keys - ckpt_keys
        print('Missing keys:{}'.format(len(missing_keys)))
        print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
        print('Used keys:{}'.format(len(used_pretrained_keys)))
        assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
        return True


    def remove_prefix(self,state_dict, prefix):
        ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
        print('remove prefix \'{}\''.format(prefix))
        f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
        return {f(key): value for key, value in state_dict.items()}


    def load_model(self,model, pretrained_path, load_to_cpu):
        print('Loading pretrained model from {}'.format(pretrained_path))
        if load_to_cpu:
            pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
        else:
            device = torch.cuda.current_device()
            pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
        if "state_dict" in pretrained_dict.keys():
            pretrained_dict = self.emove_prefix(pretrained_dict['state_dict'], 'module.')
        else:
            pretrained_dict = self.remove_prefix(pretrained_dict, 'module.')
        self.check_keys(model, pretrained_dict)
        model.load_state_dict(pretrained_dict, strict=False)
        return model
        
    
    def img_process(self, img):
        target_size = self.cfg["image_size"]
        max_size = 1080
        im_shape = img.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        im_scale = float(target_size) / float(im_size_min)
        if np.round(im_scale * im_size_max) > max_size:
            im_scale = float(max_size) / float(im_size_max)
        im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
       
        
        return im, im_scale
    
    def detect(self,img):
        img,imscale=self.img_process(img)
     
        resize=1
        img_raw = img
        img = np.float32(img_raw)

        im_height, im_width, _ = img.shape
        scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.opt.device)
        scale = scale.to(self.opt.device)
       
        tic = time.time()
        loc, conf, landms = self.net(img)  # forward pass
        print('net forward time: {:.4f}'.format(time.time() - tic))
        t1=time.time()
        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
       
        priors = priorbox.forward()
        
        priors = priors.to(self.opt.device)
        prior_data = priors.data
        
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance'])
        scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2]])
        scale1 = scale1.to(self.opt.device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()
        
        # ignore low scores
        inds = np.where(scores > self.opt.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.opt.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = py_cpu_nms(dets, self.opt.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        print("len ",len(dets))
        landms = landms[keep]
        dets/=imscale
        landms /=imscale

        # keep top-K faster NMS
        dets = dets[:self.opt.keep_top_k, :]
        boxes=[list(map(int, x)) for x in dets]

        landms = landms[:self.opt.keep_top_k, :]
        lands=[list(map(int, x)) for x in landms]
        # dets = np.concatenate((dets, landms), axis=1)
        

        return boxes,lands
Exemplo n.º 2
0
class FaceDetector():

    def __init__(self):
        # TODO: add initialization logic
        torch.set_grad_enabled(False)
        self.cfg = None
        if args.network == "mobile0.25":
            self.cfg = cfg_mnet
        elif args.network == "resnet50":
            self.cfg = cfg_re50
        elif args.network == "resnet18":
            self.cfg = cfg_re18
        elif args.network == "resnet34":
            self.cfg = cfg_re34
        # net and model
        self.net = RetinaFace(cfg=self.cfg, phase='test')
        # self.net = load_model(self.net, args.trained_model, args.cpu)
        self.net.eval()
        print('Finished loading model!')
        print(self.net)
        cudnn.benchmark = True
        self.device = torch.device("cpu" if args.cpu else "cuda")
        self.net = self.net.to(self.device)

        self.resize = 1

    def detect_image(self, img) -> List[FaceDetection]:
        # TODO: add detect logic for single image
        print(np.shape(img))
        tic = time.time()
        img = np.float32(img)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        loc, conf, landms = self.net(img)  # forward pass
        
        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale / self.resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)

        keep = py_cpu_nms(dets, args.nms_threshold)

        dets = dets[keep, :]

        dets = dets[:args.keep_top_k, :]

        # show image
        box_list = []
        for b in dets:
            if b[4] < args.vis_thres:
                continue
            score = b[4]
            b = list(map(int, b))
            box_list.append(FaceDetection(b[0], b[1], b[2], b[3], 0, score))

        print('net forward time: {:.4f}'.format(time.time() - tic))

        return box_list

    def detect_images(self, imgs) -> List[List[FaceDetection]]:
        boxes_list = []
        for img in imgs:
            boxes = self.detect_image(img)
            boxes_list.append(boxes)
        return boxes_list
    
    def visualize(self, image, detection_list: List[FaceDetection], color=(0,0,255), thickness=5):
        img = image.copy()
        for detection in detection_list:
            bbox = detection.bbox
            p1 = bbox.left, bbox.top
            p2 = bbox.right, bbox.bottom
            cv2.rectangle(img, p1, p2, color, thickness=thickness, lineType=cv2.LINE_AA)
        return img
class FaceDetectorRetinaFace(object):
    """
        Class to support the face detection via RetinaFace
        Based on the code found at https://github.com/biubug6/Pytorch_Retinaface/blob/master/test_fddb.py
    """
    def __init__(
        self,
        enable_cuda=settings.CUDA_ENABLED,
        face_rect_expand_factor=FACE_RECT_EXPAND_FACTOR,
        trained_model=settings.FACE_DETECTION_MODEL,
        network=settings.FACE_DETECTION_NETWORK,
    ):
        """
            Initializes the RetinaFace in PyTorch
            Arguments:
                enable_cuda: boolean indicating whether CUDA must be used for the extraction of the features
                face_rect_expand_factor: Expansion factor for the detection face rectangle
                trained_model: Path to a pretrained model file with weights
                network: Name of the network used for the detection. The options are 'mobile0.25' or 'resnet50'.
        """
        torch.set_grad_enabled(False)
        cudnn.benchmark = True
        self.is_cuda_enable = enable_cuda
        self.face_rect_expand_factor = face_rect_expand_factor
        self.trained_model = trained_model
        self.cfg = None
        if network == 'mobile0.25':
            self.cfg = cfg_mnet
        elif network == 'resnet50':
            self.cfg = cfg_re50
        assert self.cfg != None, "Network name can only be 'resnet50' or 'mobile0.25' !"
        self.net = RetinaFace(cfg=self.cfg, phase='test')
        self.net = self.load_model(self.net, self.trained_model,
                                   not self.is_cuda_enable)
        self.net.eval()
        self.device = torch.device(
            'cpu' if not self.is_cuda_enable else 'cuda')
        self.net = self.net.to(self.device)

    def check_keys(self, model, pretrained_state_dict):
        """
            Checks missing dictionary keys in the pretrained model.

            Extracted 'as is' from https://github.com/biubug6/Pytorch_Retinaface/blob/master/test_fddb.py
        """
        ckpt_keys = set(pretrained_state_dict.keys())
        model_keys = set(model.state_dict().keys())
        used_pretrained_keys = model_keys & ckpt_keys
        unused_pretrained_keys = ckpt_keys - model_keys
        missing_keys = model_keys - ckpt_keys
        print('Missing keys:{}'.format(len(missing_keys)))
        print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
        print('Used keys:{}'.format(len(used_pretrained_keys)))
        assert len(
            used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
        return True

    def remove_prefix(self, state_dict, prefix):
        """
            Old style model is stored with all names of parameters sharing common prefix 'module.'

            Extracted 'as is' from https://github.com/biubug6/Pytorch_Retinaface/blob/master/test_fddb.py
        """
        print('remove prefix \'{}\''.format(prefix))
        f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
        return {f(key): value for key, value in state_dict.items()}

    def load_model(self, model, pretrained_path, load_to_cpu):
        """
            Loads the specified trained model
            Arguments:
                load_to_cpu: boolean indicating whether to load the model on the CPU or the GPU
                model: RetinaFace model object
                pretrained_path: Path to a pretrained model file with weights

            Extracted 'as is' from https://github.com/biubug6/Pytorch_Retinaface/blob/master/test_fddb.py
        """
        print('Loading pretrained model from {}'.format(pretrained_path))
        if load_to_cpu:
            pretrained_dict = torch.load(
                pretrained_path, map_location=lambda storage, loc: storage)
        else:
            device = torch.cuda.current_device()
            pretrained_dict = torch.load(
                pretrained_path,
                map_location=lambda storage, loc: storage.cuda(device))
        if "state_dict" in pretrained_dict.keys():
            pretrained_dict = self.remove_prefix(pretrained_dict['state_dict'],
                                                 'module.')
        else:
            pretrained_dict = self.remove_prefix(pretrained_dict, 'module.')
        self.check_keys(model, pretrained_dict)
        model.load_state_dict(pretrained_dict, strict=False)
        return model

    def detect_faces(self, img, return_best=False):
        """
            Computes a list of faces detected in the input image in the form of a list of bounding-boxes, one per each detected face.
            Arguments:
                img: The image to be input to the RetinaFace model
                return_best: boolean indicating whether to return just to best detection or the complete list of detections
            Returns:
                A list of arrays. Each array contains the image coordinates of the corners of a bounding-box and the score of the detection
                in the form [x1,y1,x2,y2,score], where (x1,y1) are the integer coordinates of the top-left corner of the box and (x2,y2) are
                the coordinates of the bottom-right corner of the box. The score is a floating-point number.
                When return_best is True, the returned list will contain only one bounding-box
        """
        if numpy.all(img != None):
            try:
                im_height, im_width, _ = img.shape
                scale = torch.Tensor(
                    [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
                img = numpy.float32(img)
                img -= (104, 117, 123)
                img = img.transpose(2, 0, 1)
                img = torch.from_numpy(img).unsqueeze(0)
                img = img.to(self.device)
                scale = scale.to(self.device)

                # note below that the landmarks (3rd returned value) are ignored
                loc, conf, _ = self.net(img)

                priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
                priors = priorbox.forward()
                priors = priors.to(self.device)
                prior_data = priors.data
                boxes = decode(loc.data.squeeze(0), prior_data,
                               self.cfg['variance'])
                boxes = boxes * scale
                boxes = boxes.cpu().numpy()
                scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

                # ignore low scores
                inds = numpy.where(scores > CONF_THRESH)[0]
                boxes = boxes[inds]
                scores = scores[inds]

                # keep top-K before NMS
                # order = scores.argsort()[::-1][:args.top_k]
                order = scores.argsort()[::-1]
                boxes = boxes[order]
                scores = scores[order]

                # do NMS
                dets = numpy.hstack(
                    (boxes, scores[:, numpy.newaxis])).astype(numpy.float32,
                                                              copy=False)
                keep = py_cpu_nms(dets, NMS_THRESH)

                # keep top-K faster NMS
                detections = dets[keep, :]

                if len(detections) > 0:
                    if return_best:
                        # detections is ordered by confidence so the first one is the best
                        det = numpy.squeeze(detections[0, 0:5])
                        bounding_box = numpy.zeros(5, dtype=numpy.float32)
                        # extend detection
                        extend_factor = self.face_rect_expand_factor
                        width = round(det[2] - det[0] + 1)
                        height = round(det[3] - det[1] + 1)
                        length = (width + height) / 2.0
                        centrepoint = [
                            round(det[0]) + width / 2.0,
                            round(det[1]) + height / 2.0
                        ]
                        bounding_box[0] = centrepoint[0] - round(
                            (1 + extend_factor) * length / 2.0)
                        bounding_box[1] = centrepoint[1] - round(
                            (1 + extend_factor) * length / 2.0)
                        bounding_box[2] = centrepoint[0] + round(
                            (1 + extend_factor) * length / 2.0)
                        bounding_box[3] = centrepoint[1] + round(
                            (1 + extend_factor) * length / 2.0)
                        # prevent going off image
                        bounding_box[0] = int(max(bounding_box[0], 0))
                        bounding_box[1] = int(max(bounding_box[1], 0))
                        bounding_box[2] = int(
                            min(bounding_box[2], img.shape[3]))
                        bounding_box[3] = int(
                            min(bounding_box[3], img.shape[2]))
                        bounding_box[4] = det[4]
                        return [bounding_box]
                    else:
                        det_list = []
                        for j in range(len(detections)):
                            det = numpy.squeeze(detections[j, 0:5])
                            bounding_box = numpy.zeros(5, dtype=numpy.float32)
                            # extend detection
                            extend_factor = self.face_rect_expand_factor
                            width = round(det[2] - det[0] + 1)
                            height = round(det[3] - det[1] + 1)
                            length = (width + height) / 2.0
                            centrepoint = [
                                round(det[0]) + width / 2.0,
                                round(det[1]) + height / 2.0
                            ]
                            bounding_box[0] = centrepoint[0] - round(
                                (1 + extend_factor) * length / 2.0)
                            bounding_box[1] = centrepoint[1] - round(
                                (1 + extend_factor) * length / 2.0)
                            bounding_box[2] = centrepoint[0] + round(
                                (1 + extend_factor) * length / 2.0)
                            bounding_box[3] = centrepoint[1] + round(
                                (1 + extend_factor) * length / 2.0)
                            # prevent going off image
                            bounding_box[0] = int(max(bounding_box[0], 0))
                            bounding_box[1] = int(max(bounding_box[1], 0))
                            bounding_box[2] = int(
                                min(bounding_box[2], img.shape[3]))
                            bounding_box[3] = int(
                                min(bounding_box[3], img.shape[2]))
                            bounding_box[4] = det[4]
                            det_list.append(bounding_box)
                        return det_list
                else:
                    return None

            except Exception as e:
                print('Exception in FaceDetectorRetinaFace: ' + str(e))
                pass

        return None
Exemplo n.º 4
0
def wxf():
    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'))
    torch.set_grad_enabled(False)

    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    # print('Finished loading model!')
    # print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)
    images = os.listdir('./images')
    known_face_names = []
    known_face_encodings = []
    for i in images:

        picture_newname = i
        someone_img = face_recognition.load_image_file("images/" +i)
        someone_face_encoding = face_recognition.face_encodings(someone_img)[0]
        known_face_names.append(picture_newname)
        known_face_encodings.append(someone_face_encoding)
        someone_img = []
        someone_face_encoding = []
    name = "Unknown"
    while (1):
        ret, imgre = cap.read()
        small_frame = cv2.resize(imgre, (0, 0), fx=0.25, fy=0.25)
        if not ret:
            print('Video open error.')
            break
        rgb_small_frame = small_frame[:, :, ::-1]
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
        for i in face_encodings:
            match = face_recognition.compare_faces(known_face_encodings, i, tolerance=0.39)
            if True in match:
                match_index = match.index(True)
                name = known_face_names[match_index].split('.')[0]
                # To print name and time



        img = np.float32(imgre)

        target_size = 1600
        max_size = 2150
        im_shape = img.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        resize = float(target_size) / float(im_size_min)
        # prevent bigger axis from being more than max_size:
        if np.round(resize * im_size_max) > max_size:
            resize = float(max_size) / float(im_size_max)
        if args.origin_size:
            resize = 1

        if resize != 1:
            img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        loc, conf, landms = net(img)  # forward pass

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
        scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2]])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1]
        # order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)

        keep = py_cpu_nms(dets, args.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        # dets = dets[:args.keep_top_k, :]
        # landms = landms[:args.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)

        for b in dets:
            if b[4] < args.vis_thres:
                continue
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(imgre, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(imgre, name, (cx, cy),
                        cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
            #
            # landms
            # cv2.circle(imgre, (b[5], b[6]), 1, (0, 0, 255), 4)
            # cv2.circle(imgre, (b[7], b[8]), 1, (0, 255, 255), 4)
            # cv2.circle(imgre, (b[9], b[10]), 1, (255, 0, 255), 4)
            # cv2.circle(imgre, (b[11], b[12]), 1, (0, 255, 0), 4)
            # cv2.circle(imgre, (b[13], b[14]), 1, (255, 0, 0), 4)
        # img = numpy.array(img)
        cv2.imshow('wyfRetinaface', imgre)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 5
0
    cfg = None
    if ops.detect_network == "mobile0.25":
        cfg = cfg_mnet
    elif ops.detect_network == "resnet50":
        cfg = cfg_re50
    # net and model
    detect_model = RetinaFace(cfg=cfg, phase='test')

    detect_model = detect_model.to(device)

    if os.access(ops.detect_model, os.F_OK):  # checkpoint
        chkpt = torch.load(ops.detect_model, map_location=device)
        detect_model.load_state_dict(chkpt)
        print('load detect model : {}'.format(ops.detect_model))

    detect_model.eval()
    if use_cuda:
        cudnn.benchmark = True

    print('loading model done ~')
    #-------------------------------------------------------------------------- run vedio
    video_capture = cv2.VideoCapture(ops.test_path)
    with torch.no_grad():
        idx = 0
        while True:
            ret, img_raw = video_capture.read()

            if ret:
                if idx == 0:
                    print('video shape : {}'.format(img_raw.shape))
                idx += 1
def main():
    torch.set_grad_enabled(False)
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print('Finished loading model!')
    print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    # data_dir = '../face_dataset/masked_whn'
    # target_dir = '../face_dataset/masked_whn_crop'

    # data_dir = '../face_dataset/CASIA-maxpy-clean'
    # target_dir = '../face_dataset/CASIA-maxpy-clean_crop'

    # data_dir = '../frvtTestbed/pnas/images'
    # target_dir = '../frvtTestbed/pnas_crop'
    #
    # crop_face(net, device, cfg, data_dir, target_dir)
    #
    # data_dir = '../frvtTestbed/common/images'
    # target_dir = '../frvtTestbed/mugshot_crop'
    #
    # crop_face(net, device, cfg, data_dir, target_dir)

    # data_dir = '../face_dataset/calfw/aligned_images'
    # target_dir = '../face_dataset/calfw/aligned_images_crop'
    #
    # crop_face(net, device, cfg, data_dir, target_dir)
    #
    # data_dir = '../face_dataset/cplfw/aligned_images'
    # target_dir = '../face_dataset/cplfw/aligned_images_crop'
    #
    # crop_face(net, device, cfg, data_dir, target_dir)

    # data_dir = '../face_dataset/Celeba/img_align_celeba'
    # target_dir = '../face_dataset/Celeba/img_align_celeba_crop'
    #
    # crop_face(net, device, cfg, data_dir, target_dir)

    # data_dir = '../face_dataset/GEO_enroll'
    # target_dir = '../face_dataset/GEO_enroll_crop'
    # crop_face(net, device, cfg, data_dir, target_dir)
    #
    # data_dir = '../face_dataset/GEO_enroll'
    # target_dir = '../face_dataset/GEO_enroll_large_crop'
    # crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.1, right_scale=0.1, up_scale=0.1, low_scale=0.1)
    #
    # data_dir = '../face_dataset/GEO_Mask_Testing_Dataset'
    # target_dir = '../face_dataset/GEO_Mask_Testing_Dataset_large_crop'
    # crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.05, right_scale=0.05, up_scale=0.05, low_scale=0.05)

    # data_dir = '../face_dataset/GEO_Mask_Testing_Dataset'
    # target_dir = '../face_dataset/GEO_Mask_Testing_Dataset_crop'
    # crop_face(net, device, cfg, data_dir, target_dir)
    #
    # data_dir = '../face_dataset/GEO_env_dataset'
    # target_dir = '../face_dataset/GEO_env_dataset_crop'
    # crop_face(net, device, cfg, data_dir, target_dir)
    #
    # data_dir = '../face_dataset/GEO_identity'
    # target_dir = '../face_dataset/GEO_identity_crop'
    # crop_face(net, device, cfg, data_dir, target_dir)

    # data_dir = '../face_dataset/MEDS_II'
    # target_dir = '../face_dataset/MEDS_II_crop'
    # crop_face(net, device, cfg, data_dir, target_dir)
    #
    # data_dir = '../face_dataset/MEDS_II_mask'
    # target_dir = '../face_dataset/MEDS_II_mask_crop'
    # crop_face(net, device, cfg, data_dir, target_dir)

    # data_dir = '/media/bossun/Bossun_TX2/face_dataset/CACD_VS'
    # target_dir = '/media/bossun/Bossun_TX2/face_dataset/CACD_VS_crop'
    # crop_face(net, device, cfg, data_dir, target_dir)

    data_dir = '../face_dataset/CASIA-maxpy-clean'
    target_dir = '../face_dataset/CASIA-maxpy-clean_large_crop'
    crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.1, right_scale=0.1, up_scale=0.1, low_scale=0.1)

    data_dir = '../face_dataset/1N_test_dataset_origin/GEO_Mask_Testing_Dataset_1N/identity'
    target_dir = '../face_dataset/1N_test_dataset/GEO_Mask_Testing_Dataset_large_crop_1N/identity'
    crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.1, right_scale=0.1, up_scale=0.1, low_scale=0.1)
Exemplo n.º 7
0
class FaceDetector:
    def __init__(self,
                 trained_mode='./weights/mobilenet0.25_Final.pth',
                 network='mobile0.25',
                 cpu=True,
                 confidence_threshold=0.02,
                 top_k=5000,
                 nms_threshold=0.4,
                 keep_top_k=750,
                 vis_thres=0.6):
        self.trained_model = trained_mode
        self.network = network
        self.network = network
        self.cpu = cpu
        self.confidence_threshold = confidence_threshold
        self.top_k = top_k
        self.nms_threshold = nms_threshold
        self.keep_top_k = keep_top_k
        self.vis_thres = vis_thres

        torch.set_grad_enabled(False)
        self.cfg = None
        if self.network == "mobile0.25":
            setattr(self, 'cfg', cfg_mnet)
        elif self.network == "resnet50":
            setattr(self, 'cfg', cfg_re50)
        else:
            raise (Exception("Invalid NetWork"))

        # build net and load model
        self.net = RetinaFace(self.cfg, phase='test')
        self.net = load_model(self.net, self.trained_model, self.cpu)
        self.net = self.net.eval()

        self.device = torch.device("cpu" if self.cpu else "cuda")
        self.net = self.net.to(self.device)
        self.resize = 1

    def detect(self, image_raw):
        """
        Detect face from single image
        :param image_raw: ndarray of image
        :return:
        """

        img = np.float32(image_raw)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        tic = time.time()
        loc, conf, landms = self.net(img)  # forward pass
        # print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale / self.resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              self.cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(self.device)
        landms = landms * scale1 / self.resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        print("shape of landms: ", landms.shape)
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, self.nms_threshold)

        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:self.keep_top_k, :]
        landms = landms[:self.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)

        return dets
from data import cfg_mnet
from models.retinaface import RetinaFace
from utils.net_utils import load_model, image_process, process_face_data

# import torch2trt.converters.cat

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.set_grad_enabled(False)
cfg = cfg_mnet
retina_trained_model = "./weights/mobilenet0.25_Final.pth"
use_cpu = False
# cfg = cfg_re50
retina_net = RetinaFace(cfg=cfg, phase='test')
retina_net = load_model(retina_net, retina_trained_model, use_cpu)
retina_net.eval()
cudnn.benchmark = True
retina_net = retina_net.to(device)


def main(img_path):
    test_img = cv2.imread(img_path)
    resize = 1
    im, im_width, im_height, scale = image_process(test_img, device)
    print(im.shape)
    model = torch2trt(retina_net, [im],
                      fp16_mode=True,
                      max_workspace_size=100000)
    tic = time.time()
    loc, conf, landms = model(im)
    print('net forward time: {:.4f}'.format(time.time() - tic))
Exemplo n.º 9
0
    model.load_state_dict(pretrained_dict, strict=False)
    return model


if __name__ == '__main__':
    torch.set_grad_enabled(False)  #test的标志

    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()  #test ,训练时 写作net.train()
    print('Finished loading model!')
    print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    # testing dataset
    testset_folder = args.dataset_folder
    testset_list = args.dataset_folder[:-8] + "test_val.txt"

    with open(testset_list, 'r') as fr:
        test_dataset = fr.read().split('\n')
    num_images = len(test_dataset)

    _t = {'forward_pass': Timer(), 'misc': Timer()}
Exemplo n.º 10
0
def main():
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet18":
        cfg = cfg_re18
    elif args.network == "resnet34":
        cfg = cfg_re34
    elif args.network == "resnet50":
        cfg = cfg_re50
    elif args.network == "Efficientnet-b0":
        cfg = cfg_eff_b0
    elif args.network == "Efficientnet-b4":
        cfg = cfg_eff_b4
    elif args.network == "resnet34_hsfd":
        cfg = cfg_re34_hsfd_finetune
    elif args.network == "resnet34_hsfd_not_finetune":
        cfg = cfg_re34_hsfd_not_finetune

    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print('Finished loading model!')
    print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    # # testing dataset
    # testset_folder = args.dataset_folder
    # # testset_list = args.dataset_folder[:-7] + "wider_val.txt"
    # # with open(testset_list, 'r') as fr:
    # #     test_dataset = fr.read().split()
    # test_dataset = []
    # for event in os.listdir(testset_folder):
    #     subdir = os.path.join(testset_folder, event)
    #     img_names = os.listdir(subdir)
    #     for img_name in img_names:
    #         test_dataset.append([event, os.path.join(subdir, img_name)])
    # num_images = len(test_dataset)

    used_channels = cfg['used_channels']
    img_dim = cfg['image_size']
    test_dataset = EcustHsfdDetection(args.dataset_file,
                                      used_channels,
                                      preproc=valid_preproc(img_dim, None),
                                      mode='valid')
    num_images = len(test_dataset)
    datadir = '/'.join(args.dataset_file.split('/')[:-1])

    pred_file = os.path.join(f'{args.save_folder:s}_pred.txt')
    gt_file = os.path.join(f'{args.save_folder:s}_gt.txt')
    fp1 = open(pred_file, 'w')
    fp2 = open(gt_file, 'w')

    _t = {'forward_pass': Timer(), 'misc': Timer()}

    # testing begin
    for i, img_name in enumerate(test_dataset.imgs_path):
        if i % 100 == 0:
            torch.cuda.empty_cache()

        # image_path = testset_folder + img_name
        img_raw = load_datacube(img_name)[..., used_channels]
        # img_raw = cv2.imread(img_name, cv2.IMREAD_COLOR)
        img = np.float32(img_raw)

        # testing scale
        target_size = img_dim
        max_size = 2150
        im_shape = img.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        resize = float(target_size) / float(im_size_min)
        # prevent bigger axis from being more than max_size:
        if np.round(resize * im_size_max) > max_size:
            resize = float(max_size) / float(im_size_max)
        if args.origin_size:
            resize = 1

        if resize != 1:

            img = np.stack([
                cv2.resize(img[..., i], None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) \
                for i in range(img.shape[-1])
            ], axis=-1)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img = (img - 127.5) / 128.0
        # img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        _t['forward_pass'].tic()
        loc, conf, landms = net(img)  # forward pass
        _t['forward_pass'].toc()
        _t['misc'].tic()
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1]
        # order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:args.keep_top_k, :]
        landms = landms[:args.keep_top_k, :]

        prediction = np.concatenate((dets, landms), axis=1)
        _t['misc'].toc()

        # --------------------------------------------------------------------
        # save_name = os.path.join(args.save_folder, img_name.split('/')[-1].split('.')[0] + ".txt")
        # dirname = os.path.dirname(save_name)
        # if not os.path.isdir(dirname):
        #     os.makedirs(dirname)
        # with open(save_name, "w") as fd:
        #     bboxs = dets
        #     file_name = os.path.basename(save_name)[:-4] + "\n"
        #     bboxs_num = str(len(bboxs)) + "\n"
        #     fd.write(file_name)
        #     fd.write(bboxs_num)
        #     for box in bboxs:
        #         x = int(box[0])
        #         y = int(box[1])
        #         w = int(box[2]) - int(box[0])
        #         h = int(box[3]) - int(box[1])
        #         confidence = str(box[4])
        #         line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n"
        #         fd.write(line)

        fp1.write(f"# {img_name.lstrip(datadir).lstrip('/')}\n")
        if dets.shape[0] > 0:

            dets = prediction[0][:4].astype(np.int).tolist()
            dets[2] -= dets[0]
            dets[3] -= dets[1]
            landms = prediction[0][4:14]
            scores = prediction[0][14]

            label = [0. for _ in range(20)]
            label[-1] = scores
            label[:4] = dets
            label[4:-1] = landms
            label = ' '.join(list(map(str, label)))
            fp1.write(f'{label}\n')

        gt_label = ' '.join(list(map(str, test_dataset.words[i][0])))
        fp2.write(f"# {img_name.lstrip(datadir).lstrip('/')}\n")
        fp2.write(f'{gt_label}\n')
        print('im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s'.
              format(i + 1, num_images, _t['forward_pass'].average_time,
                     _t['misc'].average_time))

        # # save image
        # if args.save_image:
        #     for b in dets:
        #         if b[4] < args.vis_thres:
        #             continue
        #         text = "{:.4f}".format(b[4])
        #         b = list(map(int, b))
        #         cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
        #         cx = b[0]
        #         cy = b[1] + 12
        #         cv2.putText(img_raw, text, (cx, cy),
        #                     cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

        #         # landms
        #         cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
        #         cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
        #         cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
        #         cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
        #         cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
        #     # save image
        #     if not os.path.exists("./results/"):
        #         os.makedirs("./results/")
        #     name = "./results/" + str(i) + ".jpg"
        #     cv2.imwrite(name, img_raw)

    fp1.close()
Exemplo n.º 11
0
def train():

    net = RetinaFace(cfg=cfg)
    logger.info("Printing net...")
    logger.info(net)

    if args.resume_net is not None:
        logger.info('Loading resume network...')
        state_dict = torch.load(args.resume_net)
        # create new OrderedDict that does not contain `module.`
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in state_dict.items():
            head = k[:7]
            if head == 'module.':
                name = k[7:] # remove `module.`
            else:
                name = k
            new_state_dict[name] = v
        net.load_state_dict(new_state_dict)

    if num_gpu > 1 and gpu_train:
        net = torch.nn.DataParallel(net).cuda()
    else:
        net = net.cuda()

    cudnn.benchmark = True

    priorbox = PriorBox(cfg, image_size=(img_dim, img_dim))
    with torch.no_grad():
        priors = priorbox.forward()
        priors = priors.cuda()

    net.train()
    epoch = 0 + args.resume_epoch
    logger.info('Loading Dataset...')

    trainset = WiderFaceDetection(training_dataset, preproc=train_preproc(img_dim, rgb_mean), mode='train')
    validset = WiderFaceDetection(training_dataset, preproc=valid_preproc(img_dim, rgb_mean), mode='valid')
    # trainset = WiderFaceDetection(training_dataset, transformers=train_transformers(img_dim), mode='train')
    # validset = WiderFaceDetection(training_dataset, transformers=valid_transformers(img_dim), mode='valid')
    trainloader = data.DataLoader(trainset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)
    validloader = data.DataLoader(validset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)
    logger.info(f'Totally {len(trainset)} training samples and {len(validset)} validating samples.')

    epoch_size = math.ceil(len(trainset) / batch_size)
    max_iter = max_epoch * epoch_size
    logger.info(f'max_epoch: {max_epoch:d} epoch_size: {epoch_size:d}, max_iter: {max_iter:d}')

    # optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay)
    optimizer = optim.Adam(net.parameters(), lr=initial_lr, weight_decay=weight_decay)
    scheduler = _utils.get_linear_schedule_with_warmup(optimizer, int(0.1 * max_iter), max_iter)
    criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)

    stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size)
    step_index = 0

    if args.resume_epoch > 0:
        start_iter = args.resume_epoch * epoch_size
    else:
        start_iter = 0

    best_loss_val = float('inf')
    for iteration in range(start_iter, max_iter):
        if iteration % epoch_size == 0:
            # create batch iterator
            # batch_iterator = iter(tqdm(trainloader, total=len(trainloader)))
            batch_iterator = iter(trainloader)
            # if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']):
            #     torch.save(net.state_dict(), save_folder + cfg['name']+ '_epoch_' + str(epoch) + '.pth')
            epoch += 1
            torch.cuda.empty_cache()

        if (valid_steps > 0) and (iteration > 0) and (iteration % valid_steps == 0):
            net.eval()
            # validation
            loss_l_val = 0.
            loss_c_val = 0.
            loss_landm_val = 0.
            loss_val = 0.
            # for val_no, (images, targets) in tqdm(enumerate(validloader), total=len(validloader)):
            for val_no, (images, targets) in enumerate(validloader):
                # load data
                images = images.cuda()
                targets = [anno.cuda() for anno in targets]
                # forward
                with torch.no_grad():
                    out = net(images)
                    loss_l, loss_c, loss_landm = criterion(out, priors, targets)
                    loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm
                loss_l_val += loss_l.item()
                loss_c_val += loss_c.item()
                loss_landm_val += loss_landm.item()
                loss_val += loss.item()
            loss_l_val /= len(validloader)
            loss_c_val /= len(validloader)
            loss_landm_val /= len(validloader)
            loss_val /= len(validloader)
            logger.info('[Validating] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f}'
                .format(epoch, max_epoch, (iteration % epoch_size) + 1,
                epoch_size, iteration + 1, max_iter, 
                loss_val, loss_l_val, loss_c_val, loss_landm_val))
            if loss_val < best_loss_val:
                best_loss_val = loss_val
                pth = os.path.join(save_folder, cfg['name'] + '_iter_' + str(iteration) + f'_{loss_val:.4f}_' + '.pth')
                torch.save(net.state_dict(), pth)
                logger.info(f'Best validating loss: {best_loss_val:.4f}, model saved as {pth:s})')
            net.train()

        load_t0 = time.time()
        # if iteration in stepvalues:
        #     step_index += 1
        # lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size)

        # load train data
        images, targets = next(batch_iterator)
        images = images.cuda()
        targets = [anno.cuda() for anno in targets]

        # forward
        out = net(images)

        # backprop
        optimizer.zero_grad()
        loss_l, loss_c, loss_landm = criterion(out, priors, targets)
        loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm
        loss.backward()
        optimizer.step()
        scheduler.step()
        load_t1 = time.time()
        batch_time = load_t1 - load_t0
        eta = int(batch_time * (max_iter - iteration))
        if iteration % verbose_steps == 0:
            logger.info('[Training] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}'
                .format(epoch, max_epoch, (iteration % epoch_size) + 1,
                epoch_size, iteration + 1, max_iter, 
                loss.item(), loss_l.item(), loss_c.item(), loss_landm.item(), 
                scheduler.get_last_lr()[-1], batch_time, str(datetime.timedelta(seconds=eta))))
Exemplo n.º 12
0
class RetinaFaceDet(object):
    def __init__(self,model_type="mobile0.25",model_path="./weights/mobilenet0.25_Final.pth",
                 backbone_location="./weights/mobilenetV1X0.25_pretrain.tar",use_cpu=True,loading=True):
        self.cfg = None
        self.use_cpu = use_cpu
        self.model_path = model_path
        if model_type == "mobile0.25":
            self.cfg = cfg_mnet
        elif model_type == "resnet50":
            self.cfg = cfg_re50
        self.device = torch.device("cpu" if use_cpu else "cuda")
        self.net = RetinaFace(cfg=self.cfg,phase="test",backbone_location=backbone_location)
        if loading:
            print('No model path exist!') if not os.path.exists(model_path) else None
            self.loading()

        self._priors = None
        self.im_width = 0
        self.im_height = 0
        self.im_nch = 0

    def _get_model(self):
        self.net.eval()
        return self.net

    def loading(self):
        self.net = load_model(self.net, self.model_path, self.use_cpu)
        self.net.eval()
        self.net = self.net.to(self.device)

    def set_default_size(self,imgshape=[640,480,3]):#[H,W,nCh]
        im_height, im_width, im_nch = imgshape
        if im_height == self.im_height and im_width == self.im_width and self._priors is not None:
            pass
        else:
            self.im_height,self.im_width,self.im_nch = imgshape
            """
            priorbox shape [-1,4]; dim0: number of predicted bbox from network; dim1:[x_center,y_center,w,h]
            priorbox存储的内容分别是bbox中心点的位置以及人脸预设的最小尺寸,长宽比例通过variance解决
            这里的数值都是相对图像尺寸而言的相对值,取值在(0,1)之间
            """
            priorbox = PriorBox(self.cfg,image_size=(self.im_height,self.im_width))
            self._priors = priorbox.forward()

    @torch.no_grad()
    def execute_batch_mlu(self,net_output,batch_shape,threshold=0.8,topk=5000,keep_topk=750,nms_threshold=0.2):
        locs,confs,landmss = net_output
        nB, nCh, im_height, im_width = batch_shape
        scale = torch.Tensor([im_width, im_height]*2)
        scale1 = torch.Tensor([im_width, im_height] * 5)

        detss = []

        if im_height == self.im_height and im_width == self.im_width and self._priors is not None:
            pass
        else:
            self.set_default_size([im_height, im_width, nCh])

        priors = self._priors.unsqueeze(dim=0)

        boxes = batch_decode(locs, priors, self.cfg['variance'])
        boxes = boxes * scale

        scores = confs[:, :, 1]

        landms = batch_decode_landm(landmss, priors, self.cfg['variance'])
        landms = landms * scale1

        landms = landms.data.cpu().numpy()
        scores = scores.data.cpu().numpy()
        boxes = boxes.data.cpu().numpy()

        for n in range(nB):
            _landms = landms[n]
            _scores = scores[n]
            _boxes = boxes[n]

            # ignore low scores
            inds = np.where(_scores > threshold)[0]
            _boxes = _boxes[inds]
            _landms = _landms[inds]
            _scores = _scores[inds]

            # keep top-K before NMS
            order = _scores.argsort()[::-1][:topk]
            _boxes = _boxes[order]
            _landms = _landms[order]
            _scores = _scores[order]

            # do NMS
            dets = np.hstack((_boxes, _scores[:, np.newaxis])).astype(np.float32, copy=False)
            keep = py_cpu_nms(dets, nms_threshold)
            dets = dets[keep, :]
            _landms = _landms[keep]

            # keep top-K faster NMS
            dets = dets[:keep_topk, :]
            _landms = _landms[:keep_topk, :]
            # x0,y0,x1,y1,score,landmarks...
            dets = np.concatenate((dets, _landms), axis=1)
            detss.append(dets)
        return detss



    def execute_batch(self,img_batch,threshold=0.8,topk=5000,keep_topk=750,nms_threshold=0.2):
        resize = 1
        with torch.no_grad():
            img_batch = img_batch.to(self.device)
            locs,confs,landmss = self.net(img_batch)

            nB,nCh,im_height, im_width = img_batch.shape

            scale = torch.Tensor([im_width, im_height, im_width, im_height])
            scale = scale.to(self.device)

            if im_height == self.im_height and im_width == self.im_width and self._priors is not None:
                pass
            else:
                self.set_default_size([im_height,im_width,nCh])

            priors = self._priors
            priors = priors.to(self.device)
            prior_data = priors.data

            detss = []
            """
            以bbox的location为例子,最终要得到的是:
                bbox_center_x
                bbox_center_y
                bbox_w
                bbox_h
            但是,直接预测这些数值是困难的,所以需要脱离图像的尺寸,压缩到0-1的范围,所以我们改为预测:_bbox_center_x,_bbox_center_y,_bbox_w,_bbox_w,他们的关系如下:
                bbox_center_x = (_bbox_center_x)*imgW
                bbox_center_y = (_bbox_center_y)*imgH
                bbox_w = (_bbox_w)*imgW
                bbox_h = (_bbox_w)*imgH
            进一步,引入anchor的概念,即预先设定多个bbox的中心和最小的人脸长宽。我们只预测真实值与预设值之间的比例、偏移关系,
            模型预测结果为[x_offset,y_offset,w_scale,h_scale]
            预设bbox为[x_center,y_center,face_w,face_h] 即prior_data
            vx,vy控制人脸的长宽比
            他们之间相互关系为:
                _bbox_center_x = x_center + x_offset*face_w*vx
                _bbox_center_y = y_center + y_offset*face_h*vy
                _bbox_w = face_w*exp(w_scale*vx)
                _bbox_h = face_h*exp(h_scale*vy)            
            
            最终得到:
                bbox_center_x = (x_center + x_offset*face_w*vx)*imgW
                bbox_center_y = (y_center + y_offset*face_h*vy)*imgH
                bbox_w = (face_w*exp(w_scale*vx))*imgW
                bbox_h = (face_h*exp(h_scale*vy))*imgH
            """

            for idx in range(nB):
                loc = locs[idx]
                conf = confs[idx]
                landms = landmss[idx]

                """
                对loc而言,网络输出的是shape为[-1,4]的矩阵,dim1是[x_offset,y_offset,w_scale,h_scale],需要通过decode进行恢复到正常的bbox
                loc: [-1,4]; dim1: [x_offset,y_offset,w_scale,h_scale]
                prior_data: [-1,4]; dim1:[x_center,y_center,face_w,face_h] 
                虽然这里的face_w!= face_h,但本质上是相等的,因为是face_w/face_h相对图像尺寸的值。所以,本质上这里是正方形的anchor,需要variance来控制长宽比。
                variance: [vx,vy] 控制长宽比例
                _bbox_center_x = x_center + x_offset*face_w*vx
                _bbox_center_y = y_center + y_offset*face_h*vy
                _bbox_w = face_w*exp(w_scale*vx)
                _bbox_h = face_h*exp(h_scale*vy)
                进一步,转为left top corner x, left top corner y, right bottom corner x, right bottom corner y的形式 
                """
                boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
                """
                之前的结果都是normalize的结果,即(0,1),因此,需要重新rescale回去。
                这个scale即图像的大小。
                """
                boxes = boxes * scale / resize
                boxes = boxes.cpu().numpy()
                scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
                """
                基本原理同上
                """
                landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance'])
                scale1 = torch.Tensor([im_width, im_height, im_width, im_height,
                                       im_width, im_height, im_width, im_height,
                                       im_width, im_height])
                scale1 = scale1.to(self.device)
                landms = landms * scale1 / resize
                landms = landms.cpu().numpy()

                # ignore low scores
                inds = np.where(scores > threshold)[0]
                boxes = boxes[inds]
                landms = landms[inds]
                scores = scores[inds]

                # keep top-K before NMS
                order = scores.argsort()[::-1][:topk]
                boxes = boxes[order]
                landms = landms[order]
                scores = scores[order]

                # do NMS
                dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
                """
                py_cpu_nms 非极大抑制
                基本原理,假设dets是一个队列,每次取队列第一个元素(pop),并加入到keep的list中,将该元素与dets队列中其它元素比较,剔除bbox交集大于nms_threshold的元素。
                然后不断循环,直到dets为空。
                """
                keep = py_cpu_nms(dets, nms_threshold)
                # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
                dets = dets[keep, :]
                landms = landms[keep]

                # keep top-K faster NMS
                dets = dets[:keep_topk, :]
                landms = landms[:keep_topk, :]

                # x0,y0,x1,y1,score,landmarks...
                dets = np.concatenate((dets, landms), axis=1)

                detss.append(dets)
        return detss




    def execute(self,img_cv,threshold=0.6,topk=5000,keep_topk=750,nms_threshold=0.7):
        resize = 1
        with torch.no_grad():
            img = np.float32(img_cv)

            im_height, im_width, _ = img.shape
            scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
            img -= (104, 117, 123)
            img = img.transpose(2, 0, 1)
            img = torch.from_numpy(img).unsqueeze(0)
            img = img.to(self.device)
            scale = scale.to(self.device)

            loc, conf, landms = self.net(img)  # forward pass

            if im_height == self.im_height and im_width == self.im_width and self._priors is not None:
                pass
            else:
                self.set_default_size([im_height,im_width,3])

            priors = self._priors

            priors = priors.to(self.device)
            prior_data = priors.data
            boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
            landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance'])
            scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                                   img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                                   img.shape[3], img.shape[2]])
            scale1 = scale1.to(self.device)
            landms = landms * scale1 / resize
            landms = landms.cpu().numpy()

            # ignore low scores
            inds = np.where(scores > threshold)[0]
            boxes = boxes[inds]
            landms = landms[inds]
            scores = scores[inds]

            # keep top-K before NMS
            order = scores.argsort()[::-1][:topk]
            boxes = boxes[order]
            landms = landms[order]
            scores = scores[order]

            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
            keep = py_cpu_nms(dets, nms_threshold)
            # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
            dets = dets[keep, :]
            landms = landms[keep]

            # keep top-K faster NMS
            dets = dets[:keep_topk, :]
            landms = landms[:keep_topk, :]

            # x0,y0,x1,y1,score,landmarks...
            dets = np.concatenate((dets, landms), axis=1)

            return dets

    def execute_debug(self,img_cv,threshold=0.6,topk=5000,keep_topk=750,nms_threshold=0.7):
        resize = 1
        dtime = {'detection': [],
                    'nms': [],
                    'decode': [],
                    }
        tob = timer(display=False)
        with torch.no_grad():
            img = np.float32(img_cv)

            im_height, im_width, _ = img.shape
            scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
            # img -= (104, 117, 123)
            img = img.transpose(2, 0, 1)
            img = torch.from_numpy(img).unsqueeze(0)
            img = img.to(self.device)
            scale = scale.to(self.device)

            data = img.reshape(-1).tolist()
            jsd = {
                "data":[
                    {
                        "INPUT":{
                            "content":data,
                            "shape":[460,640]
                        }
                    }
                ]
            }

            # jsdata = json.dumps( jsd )
            store(jsd)

            tob.start()
            loc, conf, landms = self.net(img)  # forward pass
            utm = tob.eclapse()
            dtime['detection'].append(utm)

            if im_height == self.im_height and im_width == self.im_width and self._priors is not None:
                pass
            else:
                self.set_default_size([im_height,im_width,3])

            priors = self._priors

            tob.start()

            priors = priors.to(self.device)
            prior_data = priors.data
            print('nin=', prior_data.shape[0])
            print('loc.data',loc.data.squeeze(0)[:2,:])
            boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
            print('boxes',boxes[:2,:])
            boxes = boxes * scale / resize
            boxes = boxes.cpu().numpy()
            scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
            print('landms.data',landms.data.squeeze(0)[:2,:])
            landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance'])
            print('landms', landms[:2, :])
            scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                                   img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                                   img.shape[3], img.shape[2]])
            scale1 = scale1.to(self.device)
            landms = landms * scale1 / resize
            landms = landms.cpu().numpy()

            # ignore low scores
            inds = np.where(scores > threshold)[0]
            boxes = boxes[inds]
            landms = landms[inds]
            scores = scores[inds]
            print('nout=', scores.shape[0])


            # keep top-K before NMS
            order = scores.argsort()[::-1][:topk]
            boxes = boxes[order]
            landms = landms[order]
            scores = scores[order]

            utm = tob.eclapse()
            dtime['decode'].append(utm)

            tob.start()
            # do NMS

            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
            print('nms in=', dets.shape[0])
            keep = py_cpu_nms(dets, nms_threshold)
            # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
            dets = dets[keep, :]
            print('nms out=', dets.shape[0])
            landms = landms[keep]

            # keep top-K faster NMS
            dets = dets[:keep_topk, :]
            landms = landms[:keep_topk, :]

            # x0,y0,x1,y1,score,landmarks...
            dets = np.concatenate((dets, landms), axis=1)
            utm = tob.eclapse()
            dtime['nms'].append(utm)

            return dets,dtime
Exemplo n.º 13
0
class RetinaFaceDetector:

    def __init__(self,
            trained_model='weights/Resnet50_Final.pth',
            network='resnet50',
            cpu=False,
            confidence_threshold=0.02,
            top_k=5000,
            nms_threshold=0.4,
            keep_top_k=750,
            show_image=False,
            vis_thres=0.6
            ):
        self.cpu = cpu
        self.confidence_threshold = confidence_threshold
        self.top_k = top_k
        self.nms_threshold = nms_threshold
        self.keep_top_k = keep_top_k
        self.show_image = show_image
        self.vis_thres = vis_thres

        torch.set_grad_enabled(False)
        self.cfg = None
        if network == "mobile0.25":
            self.cfg = cfg_mnet
        elif network == "resnet50":
            self.cfg = cfg_re50
        self.cfg['pretrain'] = False
        # net and model
        self.net = RetinaFace(cfg=self.cfg, phase = 'test')
        
        self.net = load_model(self.net, os.path.join(os.path.dirname(inspect.getabsfile(RetinaFace)), '../' + trained_model), cpu)
        self.net.eval()
        print('Finished loading model!')
        print(self.net)
        cudnn.benchmark = True
        self.device = torch.device("cpu" if cpu else "cuda")
        self.net = self.net.to(self.device)

        self.resize = 1

    def detect_faces(self, img_raw, mean=(104, 117, 123)):
        img = np.float32(img_raw)

        im_height, im_width, _ = img.shape
        scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= mean
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        tic = time.time()
        loc, conf, landms = self.net(img)  # forward pass
        # print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale / self.resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance'])
        scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2]])
        scale1 = scale1.to(self.device)
        landms = landms * scale1 / self.resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:self.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = py_cpu_nms(dets, self.nms_threshold)
        # keep = nms(dets, self.nms_threshold,force_cpu=self.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:self.keep_top_k, :]
        landms = landms[:self.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)

        # show image
        if self.show_image:
            for b in dets:
                if b[4] < self.vis_thres:
                    continue
                text = "{:.4f}".format(b[4])
                b = list(map(int, b))
                cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
                cx = b[0]
                cy = b[1] + 12
                cv2.putText(img_raw, text, (cx, cy),
                            cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

                # landms
                cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
                cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
                cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
                cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
                cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)

                # Show image
                cv2.imshow('result', img_raw)
                cv2.waitKey(100)

        results = []
        for det in dets:
            box = det[:4]
            score = det[4]
            keypoints = det[5:]

            if score < self.vis_thres:
                continue
            results.append({'box':box.tolist(), 'score':score.tolist(), 'keypoints':keypoints.tolist()})

        return results

    def detect_faces_batch(self, img_raws, mean=(104, 117, 123)):
        imgs = []
        for img_raw in img_raws:
            imgs.append(np.float32(img_raw))
        imgs = np.stack(imgs, 0)

        batch_size, im_height, im_width, _ = imgs.shape
        scale = torch.Tensor([imgs.shape[2], imgs.shape[1], imgs.shape[2], imgs.shape[1]])

        imgs -= mean
        imgs = imgs.transpose(0, 3, 1, 2)
        imgs = torch.from_numpy(imgs)
        imgs = imgs.to(self.device)
        scale = scale.to(self.device)

        tic = time.time()
        loc, conf, landms = self.net(imgs)  # forward pass
        # print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data

        list_results = []
        for idx in range(batch_size):
            boxes = decode(loc.data[idx], prior_data, self.cfg['variance'])
            boxes = boxes * scale / self.resize
            boxes = boxes.cpu().numpy()
            scores = conf.data[idx].cpu().numpy()[:, 1]
            keypoints = decode_landm(landms[idx].data, prior_data, self.cfg['variance'])
            scale1 = torch.Tensor([
                imgs.shape[3], imgs.shape[2], imgs.shape[3], imgs.shape[2],
                imgs.shape[3], imgs.shape[2], imgs.shape[3], imgs.shape[2],
                imgs.shape[3], imgs.shape[2]])

            scale1 = scale1.to(self.device)
            keypoints = keypoints * scale1 / self.resize
            keypoints = keypoints.cpu().numpy()

            # ignore low scores
            inds = np.where(scores > self.confidence_threshold)[0]
            boxes = boxes[inds]
            keypoints = keypoints[inds]
            scores = scores[inds]

            # keep top-K before NMS
            order = scores.argsort()[::-1][:self.top_k]
            boxes = boxes[order]
            keypoints = keypoints[order]
            scores = scores[order]

            # do NMS
            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
            keep = py_cpu_nms(dets, self.nms_threshold)
            # keep = nms(dets, self.nms_threshold,force_cpu=self.cpu)
            dets = dets[keep, :]
            keypoints = keypoints[keep]

            # keep top-K faster NMS
            dets = dets[:self.keep_top_k, :]
            keypoints = keypoints[:self.keep_top_k, :]

            dets = np.concatenate((dets, keypoints), axis=1)

            # show image
            if self.show_image:
                for b in dets:
                    if b[4] < self.vis_thres:
                        continue
                    text = "{:.4f}".format(b[4])
                    b = list(map(int, b))
                    cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
                    cx = b[0]
                    cy = b[1] + 12
                    cv2.putText(img_raw, text, (cx, cy),
                                cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

                    # keypoints
                    cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
                    cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
                    cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
                    cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
                    cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)

                    # Show image
                    cv2.imshow('result', img_raw)
                    cv2.waitKey(100)

            results = []
            for det in dets:
                box = det[:4]
                score = det[4]
                keypoints = det[5:]

                if score < self.vis_thres:
                    continue
                results.append({'box':box.tolist(), 'score':score.tolist(), 'keypoints':keypoints.tolist()})
            
            list_results.append(results)

        return list_results
Exemplo n.º 14
0
def main():
    args = get_args()
    torch.set_grad_enabled(False)
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase="test")
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print("Finished loading model!")
    print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    resize = 1

    # testing begin
    for _ in range(100):
        image_path = "./curve/test.jpg"
        img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)

        img = np.float32(img_raw)

        im_height, im_width = img.shape[:2]
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        tic = time.time()
        loc, conf, landms = net(img)  # forward pass
        print("net forward time: {:.4f}".format(time.time() - tic))

        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg["variance"])
        scale1 = torch.Tensor([
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
        ])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:args.keep_top_k, :]
        landms = landms[:args.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)

        # show image
        if args.save_image:
            for b in dets:
                if b[4] < args.vis_thres:
                    continue
                text = "{:.4f}".format(b[4])
                b = list(map(int, b))
                cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                              2)
                cx = b[0]
                cy = b[1] + 12
                cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                            0.5, (255, 255, 255))

                # landms
                cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
                cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
                cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
                cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
                cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
            # save image

            name = "test.jpg"
            cv2.imwrite(name, img_raw)
class RetinaFaceDetector:
    def __init__(self, device, pretrained_model):
        self.device = device
        self.cfg = {
            'name': 'Resnet50',
            'min_sizes': [[16, 32], [64, 128], [256, 512]],
            'steps': [8, 16, 32],
            'variance': [0.1, 0.2],
            'clip': False,
            'loc_weight': 2.0,
            'gpu_train': True,
            'batch_size': 24,
            'ngpu': 4,
            'epoch': 100,
            'decay1': 70,
            'decay2': 90,
            'image_size': 840,
            'pretrain': True,
            'return_layers': {
                'layer2': 1,
                'layer3': 2,
                'layer4': 3
            },
            'in_channel': 256,
            'out_channel': 256
        }

        self.net = RetinaFace(cfg=self.cfg, phase='test')
        self.net = load_model(self.net, pretrained_model, device)
        self.net.eval()
        self.net = self.net.to(device)

    def predict(self,
                image,
                confidence_threshold=0.02,
                top_k=5000,
                nms_threshold=0.4,
                keep_top_k=750):
        torch.set_grad_enabled(False)
        img = np.float32(image)
        resize = 1

        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)
        scale = scale.to(self.device)

        loc, conf, landms = self.net(img)  # forward pass
        # print('net forward time: {:.4f}'.format(time.time() - tic))

        priorbox = PriorBox(self.cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              self.cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(self.device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:keep_top_k, :]
        landms = landms[:keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)
        bboxes, landmarks, confident_scores = dets[:, :4], dets[:, 5:], dets[:,
                                                                             4]

        boxes = []
        if bboxes is None:
            boxes = None
        else:
            for box in bboxes:
                x0, y0, x1, y1 = tuple(box.astype(int))
                height, width = y1 - y0, x1 - x0
                distance = max(height, width)
                if height < distance:
                    gap = distance - height
                    y0 -= gap / 2
                    y1 += gap / 2
                elif width < distance:
                    gap = distance - width
                    x0 -= gap / 2
                    x1 += gap / 2
                if y0 < 0:
                    y1 -= y0
                    y0 = 0
                if x0 < 0:
                    x1 -= x0
                    x0 = 0
                boxes.append([x0, y0, x1, y1])
            boxes = np.array(boxes).astype(int)

        return boxes, landmarks.reshape(-1, 5, 2), confident_scores
Exemplo n.º 16
0
def main():
    args = get_args()
    torch.set_grad_enabled(False)

    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase="test")
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print("Finished loading model!")
    print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    # testing dataset
    testset_folder = args.dataset_folder
    testset_list = args.dataset_folder[:-7] + "wider_val.txt"

    with open(testset_list, "r") as fr:
        test_dataset = fr.read().split()
    num_images = len(test_dataset)

    _t = {"forward_pass": Timer(), "misc": Timer()}

    # testing begin
    for i, img_name in enumerate(test_dataset):
        image_path = testset_folder + img_name
        img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
        img = np.float32(img_raw)

        # testing scale
        target_size = 1600
        max_size = 2150
        im_shape = img.shape
        im_size_min = np.min(im_shape[0:2])
        im_size_max = np.max(im_shape[0:2])
        resize = float(target_size) / float(im_size_min)
        # prevent bigger axis from being more than max_size:
        if np.round(resize * im_size_max) > max_size:
            resize = float(max_size) / float(im_size_max)
        if args.origin_size:
            resize = 1

        if resize != 1:
            img = cv2.resize(img,
                             None,
                             None,
                             fx=resize,
                             fy=resize,
                             interpolation=cv2.INTER_LINEAR)
        im_height, im_width, _ = img.shape
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)
        scale = scale.to(device)

        _t["forward_pass"].tic()
        loc, conf, landms = net(img)  # forward pass
        _t["forward_pass"].toc()
        _t["misc"].tic()
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(device)
        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data,
                              cfg["variance"])
        scale1 = torch.Tensor([
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
            img.shape[3],
            img.shape[2],
        ])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1]
        # order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        # dets = dets[:args.keep_top_k, :]
        # landms = landms[:args.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)
        _t["misc"].toc()

        # --------------------------------------------------------------------
        save_name = args.save_folder + img_name[:-4] + ".txt"
        dirname = os.path.dirname(save_name)
        if not os.path.isdir(dirname):
            os.makedirs(dirname)
        with open(save_name, "w") as fd:
            bboxs = dets
            file_name = os.path.basename(save_name)[:-4] + "\n"
            bboxs_num = str(len(bboxs)) + "\n"
            fd.write(file_name)
            fd.write(bboxs_num)
            for box in bboxs:
                x = int(box[0])
                y = int(box[1])
                w = int(box[2]) - int(box[0])
                h = int(box[3]) - int(box[1])
                confidence = str(box[4])
                line = str(x) + " " + str(y) + " " + str(w) + " " + str(
                    h) + " " + confidence + " \n"
                fd.write(line)

        print("im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s".
              format(i + 1, num_images, _t["forward_pass"].average_time,
                     _t["misc"].average_time))

        # save image
        if args.save_image:
            for b in dets:
                if b[4] < args.vis_thres:
                    continue
                text = "{:.4f}".format(b[4])
                b = list(map(int, b))
                cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                              2)
                cx = b[0]
                cy = b[1] + 12
                cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                            0.5, (255, 255, 255))

                # landms
                cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
                cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
                cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
                cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
                cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
            # save image
            if not os.path.exists("./results/"):
                os.makedirs("./results/")
            name = "./results/" + str(i) + ".jpg"
            cv2.imwrite(name, img_raw)
Exemplo n.º 17
0
    def __init__(self,
                 input_mode=0,
                 output_mode=0,
                 record_video=False,
                 email_to_share=None,
                 channel=0,
                 on_gpu=False,
                 display=False,
                 only_headcount=False,
                 send_to_nvr=False,
                 parallel=False):
        self.save_into_sheet = True
        self.on_gpu = on_gpu
        self.send_to_nvr = send_to_nvr
        if email_to_share == None:
            self.save_into_sheet = False
        if self.save_into_sheet or self.send_to_nvr:
            self.api = API(email_to_share)
        uri = 'rtsp://' + secrets.ip_camera_login + ':' + secrets.ip_camera_password + \
              '@{}:554/cam/realmonitor?channel=1&subtype=0&unicast=true&proto=Onvif'
        self.input_mode = input_mode
        self.output_mode = output_mode  # 0 - pretty display, 1 - separate graph, 2 - graph with black background
        self.record_video = record_video
        self.display = display
        self.only_headcount = only_headcount
        if input_mode == 0:
            self.channel = 0  # webcam
        elif input_mode == 1:  # ip camera
            self.channel = uri.format(channel)
            self.ip = channel
        elif input_mode == 2:  # video
            self.channel = channel
        if parallel and not on_gpu:
            self.parallel = True
        else:
            self.parallel = False

        # from classifier by Sizykh Ivan

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        self.class_labels = [
            'ANGRY', 'DISGUST', 'FEAR', 'HAPPY', 'SAD', 'SURPRISE', 'NEUTRAL'
        ]
        # PATH = "./check_points_4/net_714.pth"
        PATH = "./net_714.pth"
        if self.on_gpu:
            self.classifier = Classifier().to(self.device)
            self.classifier.load_state_dict(torch.load(PATH))
        else:
            self.classifier = Classifier()
            self.classifier.load_state_dict(
                torch.load(PATH, map_location={'cuda:0': 'cpu'}))

        # from detector by Belyakova Katerina
        self.parser = argparse.ArgumentParser(description='Retinaface')

        self.parser.add_argument('-m',
                                 '--trained_model',
                                 default='./weights/Resnet50_Final.pth',
                                 type=str,
                                 help='Trained state_dict file path to open')
        self.parser.add_argument(
            '--network',
            default='resnet50',
            help='Backbone network mobile0.25 or resnet50')
        self.parser.add_argument('--cpu',
                                 action="store_true",
                                 default=False,
                                 help='Use cpu inference')
        self.parser.add_argument('--confidence_threshold',
                                 default=0.02,
                                 type=float,
                                 help='confidence_threshold')
        self.parser.add_argument('--top_k',
                                 default=5000,
                                 type=int,
                                 help='top_k')
        self.parser.add_argument('--nms_threshold',
                                 default=0.4,
                                 type=float,
                                 help='nms_threshold')
        self.parser.add_argument('--keep_top_k',
                                 default=750,
                                 type=int,
                                 help='keep_top_k')
        self.parser.add_argument('-s',
                                 '--save_image',
                                 action="store_true",
                                 default=True,
                                 help='show detection results')
        self.parser.add_argument('--vis_thres',
                                 default=0.6,
                                 type=float,
                                 help='visualization_threshold')

        self.parser.add_argument('-v', '--video', default='vid.mp4', type=str)

        self.parser_args = self.parser.parse_args()

        self.resize = 1
        """sets parameters for RetinaFace, prerun() is used once while first usege of run()"""
        torch.set_grad_enabled(False)
        cfg = None
        if self.parser_args.network == "mobile0.25":
            cfg = cfg_mnet
        elif self.parser_args.network == "resnet50":
            cfg = cfg_re50
        # net and model
        detector = RetinaFace(cfg=cfg, phase='test')
        detector = self.load_model(
            model=detector,
            pretrained_path=self.parser_args.trained_model,
            load_to_cpu=self.parser_args.cpu)
        detector.eval()
        print('Finished loading model!')
        print(detector)

        if self.on_gpu:
            cudnn.benchmark = True
            self.detector = detector.to(self.device)
        else:
            self.detector = detector
        self.cfg = cfg
def main():
    args = get_args()
    torch.set_grad_enabled(False)

    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    else:
        raise NotImplementedError(f"Only mobile0.25 and resnet50 are suppoted.")

    # net and model
    net = RetinaFace(cfg=cfg, phase="test")
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    if args.fp16:
        net = net.half()

    print("Finished loading model!")
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    file_paths = sorted(args.input_path.rglob("*.mp4"))[: args.num_videos]

    if args.num_gpu is not None:
        start, end = split_array(len(file_paths), args.num_gpu, args.gpu_id)
        file_paths = file_paths[start:end]

    output_path = args.output_path

    if args.save_boxes:
        output_label_path = output_path / "labels"
        output_label_path.mkdir(exist_ok=True, parents=True)

    if args.save_crops:
        output_image_path = output_path / "images"
        output_image_path.mkdir(exist_ok=True, parents=True)

    if args.video_decoder == "cpu":
        decode_device = cpu(0)
    elif args.video_decoder == "gpu":
        decode_device = gpu(0)
    else:
        raise NotImplementedError(f"Only CPU and GPU devices are supported by decard, but got {args.video_decoder}")

    transform = albu.Compose([albu.Normalize(p=1, mean=(104, 117, 123), std=(1.0, 1.0, 1.0), max_pixel_value=1)], p=1)

    with torch.no_grad():
        for video_path in tqdm(file_paths):
            labels = []
            video_id = video_path.stem

            with video_reader(str(video_path), ctx=decode_device) as video:
                len_video = len(video)

                if args.num_frames is None or args.num_frames == 1:
                    frame_ids = list(range(args.num_frames))
                elif args.num_frames > 1:
                    if len_video < args.num_frames:
                        step = 1
                    else:
                        step = int(len_video / args.num_frames)

                    frame_ids = list(range(0, len_video, step))[: args.num_frames]
                else:
                    raise ValueError(f"Expect None or integer > 1 for args.num_frames, but got {args.num_frames}")

                frames = video.get_batch(frame_ids)

                if args.video_decoder == "cpu":
                    frames = frames.asnumpy()
                elif args.video_decoder == "gpu":
                    frames = dlpack.from_dlpack(frames.to_dlpack())

                if args.video_decoder == "gpu":
                    del video
                    torch.cuda.empty_cache()

                    gc.collect()

            num_frames = len(frames)

            image_height = frames.shape[1]
            image_width = frames.shape[2]

            scale1 = torch.Tensor(
                [
                    image_width,
                    image_height,
                    image_width,
                    image_height,
                    image_width,
                    image_height,
                    image_width,
                    image_height,
                    image_width,
                    image_height,
                ]
            )

            scale1 = scale1.to(device)

            scale = torch.Tensor([image_width, image_height, image_width, image_height])
            scale = scale.to(device)

            priorbox = PriorBox(cfg, image_size=(image_height, image_width))
            priors = priorbox.forward()
            priors = priors.to(device)
            prior_data = priors.data

            if args.resize_coeff is not None:
                target_size = min(args.resize_coeff)
                max_size = max(args.resize_coeff)

                image_height = frames.shape[1]
                image_width = frames.shape[2]

                image_size_min = min([image_width, image_height])
                image_size_max = max([image_width, image_height])

                resize = float(target_size) / float(image_size_min)
                if np.round(resize * image_size_max) > max_size:
                    resize = float(max_size) / float(image_size_max)
            else:
                resize = 1

            for pred_id in range(num_frames):
                frame = frames[pred_id]

                torched_image = prepare_image(frame, transform, args.video_decoder).to(device)

                if args.fp16:
                    torched_image = torched_image.half()

                loc, conf, land = net(torched_image)  # forward pass

                frame_id = frame_ids[pred_id]

                boxes = decode(loc.data[0], prior_data, cfg["variance"])

                boxes *= scale / resize

                boxes = boxes.cpu().numpy()
                scores = conf[0].data.cpu().numpy()[:, 1]

                landmarks = decode_landm(land.data[0], prior_data, cfg["variance"])

                landmarks *= scale1 / resize
                landmarks = landmarks.cpu().numpy()

                # ignore low scores
                valid_index = np.where(scores > args.confidence_threshold)[0]
                boxes = boxes[valid_index]
                landmarks = landmarks[valid_index]
                scores = scores[valid_index]

                # keep top-K before NMS
                order = scores.argsort()[::-1]
                # order = scores.argsort()[::-1][:args.top_k]
                boxes = boxes[order]
                landmarks = landmarks[order]
                scores = scores[order]

                # do NMS
                detection = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
                keep = py_cpu_nms(detection, args.nms_threshold)
                # keep = nms(detection, args.nms_threshold,force_cpu=args.cpu)

                # x_min, y_min, x_max, y_max, score
                detection = detection[keep, :]

                landmarks = landmarks[keep].astype(int)

                if detection.shape[0] == 0:
                    continue

                bboxes = detection[:, :4].astype(int)
                confidence = detection[:, 4].astype(np.float64)

                for crop_id in range(len(detection)):

                    bbox = bboxes[crop_id]

                    labels += [
                        {
                            "frame_id": int(frame_id),
                            "crop_id": crop_id,
                            "bbox": bbox.tolist(),
                            "score": confidence[crop_id],
                            "landmarks": landmarks[crop_id].tolist(),
                        }
                    ]

                    if args.save_crops:
                        x_min, y_min, x_max, y_max = bbox

                        x_min = max(0, x_min)
                        y_min = max(0, y_min)

                        crop = frame[y_min:y_max, x_min:x_max]

                        target_folder = output_image_path / f"{video_id}"
                        target_folder.mkdir(exist_ok=True, parents=True)

                        crop_file_path = target_folder / f"{frame_id}_{crop_id}.jpg"

                        if crop_file_path.exists():
                            continue

                        cv2.imwrite(
                            str(crop_file_path),
                            cv2.cvtColor(crop, cv2.COLOR_BGR2RGB),
                            [int(cv2.IMWRITE_JPEG_QUALITY), 90],
                        )

                if args.save_boxes:
                    result = {
                        "file_path": str(video_path),
                        "file_id": video_id,
                        "bboxes": labels,
                    }

                    with open(output_label_path / f"{video_id}.json", "w") as f:
                        json.dump(result, f, indent=2)
Exemplo n.º 19
0
class Inference(object):
    def __init__(self, weight_path, network, use_cpu=False):
        self.weight_path = weight_path
        self.network = network
        self.use_cpu = use_cpu
        self.resize = 1
        self.confidence_threshold = 0.02
        self.nms_threshold = 0.4
        self.vis_thres = 0.5
        self.input_height = 720
        self.input_width = 1280

        self._initialize_weight()

        self.scale = torch.Tensor([1280, 720, 1280, 720]).to(self.device)
        self.prior_data = self._initialize_priorbox(self.cfg,
                                                    self.input_height,
                                                    self.input_width)

    def _initialize_weight(self):
        self.cfg = None
        if self.network == "mobile0.25":
            self.cfg = cfg_mnet
        elif self.network == "resnet50":
            self.cfg = cfg_re50

        self.net = RetinaFace(cfg=self.cfg, phase='test')
        self.net = self._load_model(self.net, self.weight_path, self.use_cpu)
        self.net.eval()
        print('Finished loading model!')
        print(self.net)
        cudnn.benchmark = True
        self.device = torch.device("cpu" if self.use_cpu else "cuda")
        print("self. device : ", self.device)
        self.net = self.net.to(self.device)

    def _initialize_priorbox(self, cfg, im_height, im_width):
        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
        priors = priorbox.forward()
        priors = priors.to(self.device)
        prior_data = priors.data

        return prior_data

    def _remove_prefix(self, state_dict, prefix):
        ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
        print('remove prefix \'{}\''.format(prefix))
        f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
        return {f(key): value for key, value in state_dict.items()}

    def _check_keys(self, model, pretrained_state_dict):
        ckpt_keys = set(pretrained_state_dict.keys())
        model_keys = set(model.state_dict().keys())
        used_pretrained_keys = model_keys & ckpt_keys
        unused_pretrained_keys = ckpt_keys - model_keys
        missing_keys = model_keys - ckpt_keys
        print('Missing keys:{}'.format(len(missing_keys)))
        print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
        print('Used keys:{}'.format(len(used_pretrained_keys)))
        assert len(
            used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
        return True

    def _load_model(self, model, pretrained_path, load_to_cpu):
        print('Loading pretrained model from {}'.format(pretrained_path))
        if load_to_cpu:
            pretrained_dict = torch.load(
                pretrained_path, map_location=lambda storage, loc: storage)
        else:
            device = torch.cuda.current_device()
            pretrained_dict = torch.load(
                pretrained_path,
                map_location=lambda storage, loc: storage.cuda(device))
        if "state_dict" in pretrained_dict.keys():
            pretrained_dict = self._remove_prefix(
                pretrained_dict['state_dict'], 'module.')
        else:
            pretrained_dict = self._remove_prefix(pretrained_dict, 'module.')
        self._check_keys(model, pretrained_dict)
        model.load_state_dict(pretrained_dict, strict=False)
        return model

    def _forward(self, img_raw):
        # img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if img_raw is None:
            print("img is None")
            return None, None, None

        img = np.float32(img_raw)
        if self.resize != 1:
            img = cv2.resize(img,
                             None,
                             None,
                             fx=self.resize,
                             fy=self.resize,
                             interpolation=cv2.INTER_LINEAR)

        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(self.device)

        loc, conf, landms = self.net(img)  # forward pass

        # decode boxes
        boxes = decode(loc.data.squeeze(0), self.prior_data,
                       self.cfg['variance'])
        boxes = boxes * self.scale / self.resize
        boxes = boxes.cpu().numpy()

        # scores
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]

        # landmarks
        landms = decode_landm(landms.data.squeeze(0), self.prior_data,
                              self.cfg['variance'])
        scale1 = torch.Tensor([
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2], img.shape[3], img.shape[2],
            img.shape[3], img.shape[2]
        ])
        scale1 = scale1.to(self.device)
        landms = landms * scale1 / self.resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > self.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                                copy=False)
        keep = py_cpu_nms(dets, self.nms_threshold)

        dets = dets[keep, :]
        landms = landms[keep]

        dets = np.concatenate((dets, landms), axis=1)

        boxes_list = []
        socres_list = []
        landmarks_list = []
        for b in dets:
            if b[4] < self.vis_thres:
                continue

            s = b[4]
            b = list(map(int, b))
            boxes_list.append([b[0], b[1], b[2], b[3]])
            socres_list.append(s)
            landmarks_list.append([
                b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12], b[13], b[14]
            ])

        return boxes_list, socres_list, landmarks_list

    def __call__(self, img_raw):
        return self._forward(img_raw)
Exemplo n.º 20
0
    else:
        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
    check_keys(model, pretrained_dict)
    model.load_state_dict(pretrained_dict, strict=False)
    return model


torch.set_grad_enabled(False)
cfg = None
if args.network == "mobile0.25":
    cfg = cfg_mnetv1
elif args.network == "mobilenetv2":
    cfg = cfg_mnetv2
elif args.network == "mobilenetv3":
    cfg = cfg_mnetv3
elif args.network == "efficientnetb0":
    cfg = cfg_efnetb0
# net and model
model = RetinaFace(cfg=cfg, phase='test')
model = load_model(model, args.trained_model, args.cpu)
model.eval()
print('Finished loading model!')
print(model)
#cudnn.benchmark = True
device = torch.device("cpu")
model = model.to(device)

example = torch.rand(1, 3, 640, 640)
traced_script_module = torch.jit.trace(model, example)
traced_script_module.save("face.pt")
Exemplo n.º 21
0
def wxf(imgpath):

    print(imgpath)
    torch.set_grad_enabled(False)

    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    #print('Finished loading model!')
    print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    image_path = imgpath
    img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
    img = np.float32(img_raw)

    target_size = 1600
    max_size = 2150
    im_shape = img.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    resize = float(target_size) / float(im_size_min)
    # prevent bigger axis from being more than max_size:
    if np.round(resize * im_size_max) > max_size:
        resize = float(max_size) / float(im_size_max)
    if args.origin_size:
        resize = 1

    if resize != 1:
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    loc, conf, landms = net(img)  # forward pass

    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    order = scores.argsort()[::-1]
    # order = scores.argsort()[::-1][:args.top_k]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, args.nms_threshold)
    # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    # dets = dets[:args.keep_top_k, :]
    # landms = landms[:args.keep_top_k, :]

    dets = np.concatenate((dets, landms), axis=1)

    # name = rlsb.sb(imgpath)

    # save image
    if args.save_image:
        for b in dets:
            if b[4] < args.vis_thres:
                continue
            # text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
            # cx = b[0]
            # cy = b[1] + 12
            #
            #
            # cv2.putText(img_raw, text, (cx, cy),
            #              cv2.FONT_HERSHEY_SCRIPT_COMPLEX, 0.5, (255, 255, 255))
            #
            # landms
            # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
            # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
            # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
            # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
            # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
        # save image
        if not os.path.exists("./results/"):
            os.makedirs("./results/")
        name = "./results/" + "wxf" + ".jpg"
        cv2.imwrite(name, img_raw)

    return name
Exemplo n.º 22
0
                            name="",
                            op_dict=None,
                            producer_op_list=None)
    return graph


if __name__ == '__main__':
    torch.set_grad_enabled(False)

    cfg = cfg_mnet
    net = RetinaFace(cfg=cfg, phase='test')

    net = load_model(net,
                     "./converted_models/mobilenet/mobilenet0.25_Final.pth",
                     True)
    net.eval()
    print('Finish loading model!')
    #print(net)
    #cudnn.benchmark = True
    device = torch.device("cpu")
    net = net.to(device)

    img_raw = cv2.imread("./Face_Detector_ncnn/sample.jpg")
    #img = np.ones((3,240,320), dtype=np.float32)
    img = np.float32(img_raw)
    long_side = 320
    im_shape = img.shape
    im_size_min = np.min(im_shape[0:2])
    im_size_max = np.max(im_shape[0:2])
    resize = float(long_side) / float(im_size_min)
    if np.round(resize * im_size_max) > long_side:
Exemplo n.º 23
0
def detect(img_path):

    torch.set_grad_enabled(False)
    cfg = None
    if args.network == "mobile0.25":
        cfg = cfg_mnet
    elif args.network == "resnet50":
        cfg = cfg_re50
    # net and model
    net = RetinaFace(cfg=cfg, phase='test')
    #net = FaceBoxes(phase='test', size=None, num_classes=2)
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()

    #print('Finished loading model!')

    #print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    _t = {'forward_pass': Timer(), 'misc': Timer()}
    resize = 1
    # testing begin
    # for i, img_name in enumerate(test_dataset):
    #     image_path = testset_folder + img_name + '.jpg'
    #     img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)

    if type(img_path) is not np.ndarray:
        img = Image.open(img_path)
        if img.mode == 'L':
            img = img.convert('RGB')
        img_raw = np.array(img)
    else:
        img_raw = img_path

    #img_raw = img_path
    img = np.float32(img_raw)
    if resize != 1:
        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)

    _t['forward_pass'].tic()
    loc, conf, landms = net(img)  # forward pass
    _t['forward_pass'].toc()
    _t['misc'].tic()
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    #priorbox = PriorBox1(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
    boxes = boxes * scale / resize
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
    scale1 = torch.Tensor([
        img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3],
        img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
    ])
    scale1 = scale1.to(device)
    landms = landms * scale1 / resize
    landms = landms.cpu().numpy()

    # ignore low scores
    inds = np.where(scores > args.confidence_threshold)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    # keep top-K before NMS
    # order = scores.argsort()[::-1][:args.top_k]
    order = scores.argsort()[::-1]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    # do NMS
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32,
                                                            copy=False)
    keep = py_cpu_nms(dets, args.nms_threshold)

    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    # dets = dets[:args.keep_top_k, :]
    # landms = landms[:args.keep_top_k, :]

    dets = np.concatenate((dets, landms), axis=1)
    _t['misc'].toc()

    # save dets
    # if args.dataset == "FDDB":
    #     fw.write('{:s}\n'.format(img_name))
    #     fw.write('{:.1f}\n'.format(dets.shape[0]))
    #     for k in range(dets.shape[0]):
    #         xmin = dets[k, 0]
    #         ymin = dets[k, 1]
    #         xmax = dets[k, 2]
    #         ymax = dets[k, 3]
    #         score = dets[k, 4]
    #         w = xmax - xmin + 1
    #         h = ymax - ymin + 1
    #         # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score))
    #         fw.write('{:d} {:d} {:d} {:d} {:.10f}\n'.format(int(xmin), int(ymin), int(w), int(h), score))
    print('forward_pass_time: {:.4f}s misc: {:.4f}s'.format(
        _t['forward_pass'].average_time, _t['misc'].average_time))

    # if type(img_path) is not np.ndarray:
    #     img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR)
    # else:
    #     img_raw = img_path

    # # show image
    # if args.save_image:
    #     for b in dets:
    #         if b[4] < args.vis_thres:
    #             continue
    #         text = "{:.4f}".format(b[4])
    #         b = list(map(int, b))
    #         cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
    #         cx = b[0]
    #         cy = b[1] + 12
    #         cv2.putText(img_raw, text, (cx, cy),
    #                     cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

    #         # landms
    #         cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
    #         cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
    #         cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
    #         cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
    #         cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
    # save image
    # if not os.path.exists("./results/"):
    #     os.makedirs("./results/")
    # name = "./results/" + str(i) + ".jpg"
    # cv2.imwrite(name, img_raw)
    return dets, img_path