class Retina_Detector: def __init__(self): torch.set_grad_enabled(False) cudnn.benchmark = True self.opt=get_config() if self.opt.network == "mobile0.25": self.cfg = cfg_mnet elif self.opt.network == "resnet50": self.cfg = cfg_re50 # net and model self.net = RetinaFace(cfg=self.cfg, phase = 'test') self.net = self.load_model(self.net, self.opt.trained_model, self.opt.cpu) self.net.eval() self.net = self.net.to(self.opt.device) def check_keys(self,model, pretrained_state_dict): ckpt_keys = set(pretrained_state_dict.keys()) model_keys = set(model.state_dict().keys()) used_pretrained_keys = model_keys & ckpt_keys unused_pretrained_keys = ckpt_keys - model_keys missing_keys = model_keys - ckpt_keys print('Missing keys:{}'.format(len(missing_keys))) print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) print('Used keys:{}'.format(len(used_pretrained_keys))) assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' return True def remove_prefix(self,state_dict, prefix): ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' print('remove prefix \'{}\''.format(prefix)) f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x return {f(key): value for key, value in state_dict.items()} def load_model(self,model, pretrained_path, load_to_cpu): print('Loading pretrained model from {}'.format(pretrained_path)) if load_to_cpu: pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage) else: device = torch.cuda.current_device() pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) if "state_dict" in pretrained_dict.keys(): pretrained_dict = self.emove_prefix(pretrained_dict['state_dict'], 'module.') else: pretrained_dict = self.remove_prefix(pretrained_dict, 'module.') self.check_keys(model, pretrained_dict) model.load_state_dict(pretrained_dict, strict=False) return model def img_process(self, img): target_size = self.cfg["image_size"] max_size = 1080 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) return im, im_scale def detect(self,img): img,imscale=self.img_process(img) resize=1 img_raw = img img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.opt.device) scale = scale.to(self.opt.device) tic = time.time() loc, conf, landms = self.net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) t1=time.time() priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.opt.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(self.opt.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.opt.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.opt.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.opt.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] print("len ",len(dets)) landms = landms[keep] dets/=imscale landms /=imscale # keep top-K faster NMS dets = dets[:self.opt.keep_top_k, :] boxes=[list(map(int, x)) for x in dets] landms = landms[:self.opt.keep_top_k, :] lands=[list(map(int, x)) for x in landms] # dets = np.concatenate((dets, landms), axis=1) return boxes,lands
class FaceDetector(): def __init__(self): # TODO: add initialization logic torch.set_grad_enabled(False) self.cfg = None if args.network == "mobile0.25": self.cfg = cfg_mnet elif args.network == "resnet50": self.cfg = cfg_re50 elif args.network == "resnet18": self.cfg = cfg_re18 elif args.network == "resnet34": self.cfg = cfg_re34 # net and model self.net = RetinaFace(cfg=self.cfg, phase='test') # self.net = load_model(self.net, args.trained_model, args.cpu) self.net.eval() print('Finished loading model!') print(self.net) cudnn.benchmark = True self.device = torch.device("cpu" if args.cpu else "cuda") self.net = self.net.to(self.device) self.resize = 1 def detect_image(self, img) -> List[FaceDetection]: # TODO: add detect logic for single image print(np.shape(img)) tic = time.time() img = np.float32(img) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf, landms = self.net(img) # forward pass priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] dets = dets[:args.keep_top_k, :] # show image box_list = [] for b in dets: if b[4] < args.vis_thres: continue score = b[4] b = list(map(int, b)) box_list.append(FaceDetection(b[0], b[1], b[2], b[3], 0, score)) print('net forward time: {:.4f}'.format(time.time() - tic)) return box_list def detect_images(self, imgs) -> List[List[FaceDetection]]: boxes_list = [] for img in imgs: boxes = self.detect_image(img) boxes_list.append(boxes) return boxes_list def visualize(self, image, detection_list: List[FaceDetection], color=(0,0,255), thickness=5): img = image.copy() for detection in detection_list: bbox = detection.bbox p1 = bbox.left, bbox.top p2 = bbox.right, bbox.bottom cv2.rectangle(img, p1, p2, color, thickness=thickness, lineType=cv2.LINE_AA) return img
class FaceDetectorRetinaFace(object): """ Class to support the face detection via RetinaFace Based on the code found at https://github.com/biubug6/Pytorch_Retinaface/blob/master/test_fddb.py """ def __init__( self, enable_cuda=settings.CUDA_ENABLED, face_rect_expand_factor=FACE_RECT_EXPAND_FACTOR, trained_model=settings.FACE_DETECTION_MODEL, network=settings.FACE_DETECTION_NETWORK, ): """ Initializes the RetinaFace in PyTorch Arguments: enable_cuda: boolean indicating whether CUDA must be used for the extraction of the features face_rect_expand_factor: Expansion factor for the detection face rectangle trained_model: Path to a pretrained model file with weights network: Name of the network used for the detection. The options are 'mobile0.25' or 'resnet50'. """ torch.set_grad_enabled(False) cudnn.benchmark = True self.is_cuda_enable = enable_cuda self.face_rect_expand_factor = face_rect_expand_factor self.trained_model = trained_model self.cfg = None if network == 'mobile0.25': self.cfg = cfg_mnet elif network == 'resnet50': self.cfg = cfg_re50 assert self.cfg != None, "Network name can only be 'resnet50' or 'mobile0.25' !" self.net = RetinaFace(cfg=self.cfg, phase='test') self.net = self.load_model(self.net, self.trained_model, not self.is_cuda_enable) self.net.eval() self.device = torch.device( 'cpu' if not self.is_cuda_enable else 'cuda') self.net = self.net.to(self.device) def check_keys(self, model, pretrained_state_dict): """ Checks missing dictionary keys in the pretrained model. Extracted 'as is' from https://github.com/biubug6/Pytorch_Retinaface/blob/master/test_fddb.py """ ckpt_keys = set(pretrained_state_dict.keys()) model_keys = set(model.state_dict().keys()) used_pretrained_keys = model_keys & ckpt_keys unused_pretrained_keys = ckpt_keys - model_keys missing_keys = model_keys - ckpt_keys print('Missing keys:{}'.format(len(missing_keys))) print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) print('Used keys:{}'.format(len(used_pretrained_keys))) assert len( used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' return True def remove_prefix(self, state_dict, prefix): """ Old style model is stored with all names of parameters sharing common prefix 'module.' Extracted 'as is' from https://github.com/biubug6/Pytorch_Retinaface/blob/master/test_fddb.py """ print('remove prefix \'{}\''.format(prefix)) f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x return {f(key): value for key, value in state_dict.items()} def load_model(self, model, pretrained_path, load_to_cpu): """ Loads the specified trained model Arguments: load_to_cpu: boolean indicating whether to load the model on the CPU or the GPU model: RetinaFace model object pretrained_path: Path to a pretrained model file with weights Extracted 'as is' from https://github.com/biubug6/Pytorch_Retinaface/blob/master/test_fddb.py """ print('Loading pretrained model from {}'.format(pretrained_path)) if load_to_cpu: pretrained_dict = torch.load( pretrained_path, map_location=lambda storage, loc: storage) else: device = torch.cuda.current_device() pretrained_dict = torch.load( pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) if "state_dict" in pretrained_dict.keys(): pretrained_dict = self.remove_prefix(pretrained_dict['state_dict'], 'module.') else: pretrained_dict = self.remove_prefix(pretrained_dict, 'module.') self.check_keys(model, pretrained_dict) model.load_state_dict(pretrained_dict, strict=False) return model def detect_faces(self, img, return_best=False): """ Computes a list of faces detected in the input image in the form of a list of bounding-boxes, one per each detected face. Arguments: img: The image to be input to the RetinaFace model return_best: boolean indicating whether to return just to best detection or the complete list of detections Returns: A list of arrays. Each array contains the image coordinates of the corners of a bounding-box and the score of the detection in the form [x1,y1,x2,y2,score], where (x1,y1) are the integer coordinates of the top-left corner of the box and (x2,y2) are the coordinates of the bottom-right corner of the box. The score is a floating-point number. When return_best is True, the returned list will contain only one bounding-box """ if numpy.all(img != None): try: im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = numpy.float32(img) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) # note below that the landmarks (3rd returned value) are ignored loc, conf, _ = self.net(img) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = numpy.where(scores > CONF_THRESH)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS # order = scores.argsort()[::-1][:args.top_k] order = scores.argsort()[::-1] boxes = boxes[order] scores = scores[order] # do NMS dets = numpy.hstack( (boxes, scores[:, numpy.newaxis])).astype(numpy.float32, copy=False) keep = py_cpu_nms(dets, NMS_THRESH) # keep top-K faster NMS detections = dets[keep, :] if len(detections) > 0: if return_best: # detections is ordered by confidence so the first one is the best det = numpy.squeeze(detections[0, 0:5]) bounding_box = numpy.zeros(5, dtype=numpy.float32) # extend detection extend_factor = self.face_rect_expand_factor width = round(det[2] - det[0] + 1) height = round(det[3] - det[1] + 1) length = (width + height) / 2.0 centrepoint = [ round(det[0]) + width / 2.0, round(det[1]) + height / 2.0 ] bounding_box[0] = centrepoint[0] - round( (1 + extend_factor) * length / 2.0) bounding_box[1] = centrepoint[1] - round( (1 + extend_factor) * length / 2.0) bounding_box[2] = centrepoint[0] + round( (1 + extend_factor) * length / 2.0) bounding_box[3] = centrepoint[1] + round( (1 + extend_factor) * length / 2.0) # prevent going off image bounding_box[0] = int(max(bounding_box[0], 0)) bounding_box[1] = int(max(bounding_box[1], 0)) bounding_box[2] = int( min(bounding_box[2], img.shape[3])) bounding_box[3] = int( min(bounding_box[3], img.shape[2])) bounding_box[4] = det[4] return [bounding_box] else: det_list = [] for j in range(len(detections)): det = numpy.squeeze(detections[j, 0:5]) bounding_box = numpy.zeros(5, dtype=numpy.float32) # extend detection extend_factor = self.face_rect_expand_factor width = round(det[2] - det[0] + 1) height = round(det[3] - det[1] + 1) length = (width + height) / 2.0 centrepoint = [ round(det[0]) + width / 2.0, round(det[1]) + height / 2.0 ] bounding_box[0] = centrepoint[0] - round( (1 + extend_factor) * length / 2.0) bounding_box[1] = centrepoint[1] - round( (1 + extend_factor) * length / 2.0) bounding_box[2] = centrepoint[0] + round( (1 + extend_factor) * length / 2.0) bounding_box[3] = centrepoint[1] + round( (1 + extend_factor) * length / 2.0) # prevent going off image bounding_box[0] = int(max(bounding_box[0], 0)) bounding_box[1] = int(max(bounding_box[1], 0)) bounding_box[2] = int( min(bounding_box[2], img.shape[3])) bounding_box[3] = int( min(bounding_box[3], img.shape[2])) bounding_box[4] = det[4] det_list.append(bounding_box) return det_list else: return None except Exception as e: print('Exception in FaceDetectorRetinaFace: ' + str(e)) pass return None
def wxf(): cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() # print('Finished loading model!') # print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) images = os.listdir('./images') known_face_names = [] known_face_encodings = [] for i in images: picture_newname = i someone_img = face_recognition.load_image_file("images/" +i) someone_face_encoding = face_recognition.face_encodings(someone_img)[0] known_face_names.append(picture_newname) known_face_encodings.append(someone_face_encoding) someone_img = [] someone_face_encoding = [] name = "Unknown" while (1): ret, imgre = cap.read() small_frame = cv2.resize(imgre, (0, 0), fx=0.25, fy=0.25) if not ret: print('Video open error.') break rgb_small_frame = small_frame[:, :, ::-1] face_locations = face_recognition.face_locations(rgb_small_frame) face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations) for i in face_encodings: match = face_recognition.compare_faces(known_face_encodings, i, tolerance=0.39) if True in match: match_index = match.index(True) name = known_face_names[match_index].split('.')[0] # To print name and time img = np.float32(imgre) target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(imgre, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(imgre, name, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # # landms # cv2.circle(imgre, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(imgre, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(imgre, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(imgre, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(imgre, (b[13], b[14]), 1, (255, 0, 0), 4) # img = numpy.array(img) cv2.imshow('wyfRetinaface', imgre) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
cfg = None if ops.detect_network == "mobile0.25": cfg = cfg_mnet elif ops.detect_network == "resnet50": cfg = cfg_re50 # net and model detect_model = RetinaFace(cfg=cfg, phase='test') detect_model = detect_model.to(device) if os.access(ops.detect_model, os.F_OK): # checkpoint chkpt = torch.load(ops.detect_model, map_location=device) detect_model.load_state_dict(chkpt) print('load detect model : {}'.format(ops.detect_model)) detect_model.eval() if use_cuda: cudnn.benchmark = True print('loading model done ~') #-------------------------------------------------------------------------- run vedio video_capture = cv2.VideoCapture(ops.test_path) with torch.no_grad(): idx = 0 while True: ret, img_raw = video_capture.read() if ret: if idx == 0: print('video shape : {}'.format(img_raw.shape)) idx += 1
def main(): torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # data_dir = '../face_dataset/masked_whn' # target_dir = '../face_dataset/masked_whn_crop' # data_dir = '../face_dataset/CASIA-maxpy-clean' # target_dir = '../face_dataset/CASIA-maxpy-clean_crop' # data_dir = '../frvtTestbed/pnas/images' # target_dir = '../frvtTestbed/pnas_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../frvtTestbed/common/images' # target_dir = '../frvtTestbed/mugshot_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '../face_dataset/calfw/aligned_images' # target_dir = '../face_dataset/calfw/aligned_images_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/cplfw/aligned_images' # target_dir = '../face_dataset/cplfw/aligned_images_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '../face_dataset/Celeba/img_align_celeba' # target_dir = '../face_dataset/Celeba/img_align_celeba_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '../face_dataset/GEO_enroll' # target_dir = '../face_dataset/GEO_enroll_crop' # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/GEO_enroll' # target_dir = '../face_dataset/GEO_enroll_large_crop' # crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.1, right_scale=0.1, up_scale=0.1, low_scale=0.1) # # data_dir = '../face_dataset/GEO_Mask_Testing_Dataset' # target_dir = '../face_dataset/GEO_Mask_Testing_Dataset_large_crop' # crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.05, right_scale=0.05, up_scale=0.05, low_scale=0.05) # data_dir = '../face_dataset/GEO_Mask_Testing_Dataset' # target_dir = '../face_dataset/GEO_Mask_Testing_Dataset_crop' # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/GEO_env_dataset' # target_dir = '../face_dataset/GEO_env_dataset_crop' # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/GEO_identity' # target_dir = '../face_dataset/GEO_identity_crop' # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '../face_dataset/MEDS_II' # target_dir = '../face_dataset/MEDS_II_crop' # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/MEDS_II_mask' # target_dir = '../face_dataset/MEDS_II_mask_crop' # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '/media/bossun/Bossun_TX2/face_dataset/CACD_VS' # target_dir = '/media/bossun/Bossun_TX2/face_dataset/CACD_VS_crop' # crop_face(net, device, cfg, data_dir, target_dir) data_dir = '../face_dataset/CASIA-maxpy-clean' target_dir = '../face_dataset/CASIA-maxpy-clean_large_crop' crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.1, right_scale=0.1, up_scale=0.1, low_scale=0.1) data_dir = '../face_dataset/1N_test_dataset_origin/GEO_Mask_Testing_Dataset_1N/identity' target_dir = '../face_dataset/1N_test_dataset/GEO_Mask_Testing_Dataset_large_crop_1N/identity' crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.1, right_scale=0.1, up_scale=0.1, low_scale=0.1)
class FaceDetector: def __init__(self, trained_mode='./weights/mobilenet0.25_Final.pth', network='mobile0.25', cpu=True, confidence_threshold=0.02, top_k=5000, nms_threshold=0.4, keep_top_k=750, vis_thres=0.6): self.trained_model = trained_mode self.network = network self.network = network self.cpu = cpu self.confidence_threshold = confidence_threshold self.top_k = top_k self.nms_threshold = nms_threshold self.keep_top_k = keep_top_k self.vis_thres = vis_thres torch.set_grad_enabled(False) self.cfg = None if self.network == "mobile0.25": setattr(self, 'cfg', cfg_mnet) elif self.network == "resnet50": setattr(self, 'cfg', cfg_re50) else: raise (Exception("Invalid NetWork")) # build net and load model self.net = RetinaFace(self.cfg, phase='test') self.net = load_model(self.net, self.trained_model, self.cpu) self.net = self.net.eval() self.device = torch.device("cpu" if self.cpu else "cuda") self.net = self.net.to(self.device) self.resize = 1 def detect(self, image_raw): """ Detect face from single image :param image_raw: ndarray of image :return: """ img = np.float32(image_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) tic = time.time() loc, conf, landms = self.net(img) # forward pass # print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(self.device) landms = landms * scale1 / self.resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] print("shape of landms: ", landms.shape) scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) return dets
from data import cfg_mnet from models.retinaface import RetinaFace from utils.net_utils import load_model, image_process, process_face_data # import torch2trt.converters.cat device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.set_grad_enabled(False) cfg = cfg_mnet retina_trained_model = "./weights/mobilenet0.25_Final.pth" use_cpu = False # cfg = cfg_re50 retina_net = RetinaFace(cfg=cfg, phase='test') retina_net = load_model(retina_net, retina_trained_model, use_cpu) retina_net.eval() cudnn.benchmark = True retina_net = retina_net.to(device) def main(img_path): test_img = cv2.imread(img_path) resize = 1 im, im_width, im_height, scale = image_process(test_img, device) print(im.shape) model = torch2trt(retina_net, [im], fp16_mode=True, max_workspace_size=100000) tic = time.time() loc, conf, landms = model(im) print('net forward time: {:.4f}'.format(time.time() - tic))
model.load_state_dict(pretrained_dict, strict=False) return model if __name__ == '__main__': torch.set_grad_enabled(False) #test的标志 cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() #test ,训练时 写作net.train() print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # testing dataset testset_folder = args.dataset_folder testset_list = args.dataset_folder[:-8] + "test_val.txt" with open(testset_list, 'r') as fr: test_dataset = fr.read().split('\n') num_images = len(test_dataset) _t = {'forward_pass': Timer(), 'misc': Timer()}
def main(): cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet18": cfg = cfg_re18 elif args.network == "resnet34": cfg = cfg_re34 elif args.network == "resnet50": cfg = cfg_re50 elif args.network == "Efficientnet-b0": cfg = cfg_eff_b0 elif args.network == "Efficientnet-b4": cfg = cfg_eff_b4 elif args.network == "resnet34_hsfd": cfg = cfg_re34_hsfd_finetune elif args.network == "resnet34_hsfd_not_finetune": cfg = cfg_re34_hsfd_not_finetune # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # # testing dataset # testset_folder = args.dataset_folder # # testset_list = args.dataset_folder[:-7] + "wider_val.txt" # # with open(testset_list, 'r') as fr: # # test_dataset = fr.read().split() # test_dataset = [] # for event in os.listdir(testset_folder): # subdir = os.path.join(testset_folder, event) # img_names = os.listdir(subdir) # for img_name in img_names: # test_dataset.append([event, os.path.join(subdir, img_name)]) # num_images = len(test_dataset) used_channels = cfg['used_channels'] img_dim = cfg['image_size'] test_dataset = EcustHsfdDetection(args.dataset_file, used_channels, preproc=valid_preproc(img_dim, None), mode='valid') num_images = len(test_dataset) datadir = '/'.join(args.dataset_file.split('/')[:-1]) pred_file = os.path.join(f'{args.save_folder:s}_pred.txt') gt_file = os.path.join(f'{args.save_folder:s}_gt.txt') fp1 = open(pred_file, 'w') fp2 = open(gt_file, 'w') _t = {'forward_pass': Timer(), 'misc': Timer()} # testing begin for i, img_name in enumerate(test_dataset.imgs_path): if i % 100 == 0: torch.cuda.empty_cache() # image_path = testset_folder + img_name img_raw = load_datacube(img_name)[..., used_channels] # img_raw = cv2.imread(img_name, cv2.IMREAD_COLOR) img = np.float32(img_raw) # testing scale target_size = img_dim max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = np.stack([ cv2.resize(img[..., i], None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) \ for i in range(img.shape[-1]) ], axis=-1) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = (img - 127.5) / 128.0 # img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t['forward_pass'].tic() loc, conf, landms = net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] prediction = np.concatenate((dets, landms), axis=1) _t['misc'].toc() # -------------------------------------------------------------------- # save_name = os.path.join(args.save_folder, img_name.split('/')[-1].split('.')[0] + ".txt") # dirname = os.path.dirname(save_name) # if not os.path.isdir(dirname): # os.makedirs(dirname) # with open(save_name, "w") as fd: # bboxs = dets # file_name = os.path.basename(save_name)[:-4] + "\n" # bboxs_num = str(len(bboxs)) + "\n" # fd.write(file_name) # fd.write(bboxs_num) # for box in bboxs: # x = int(box[0]) # y = int(box[1]) # w = int(box[2]) - int(box[0]) # h = int(box[3]) - int(box[1]) # confidence = str(box[4]) # line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n" # fd.write(line) fp1.write(f"# {img_name.lstrip(datadir).lstrip('/')}\n") if dets.shape[0] > 0: dets = prediction[0][:4].astype(np.int).tolist() dets[2] -= dets[0] dets[3] -= dets[1] landms = prediction[0][4:14] scores = prediction[0][14] label = [0. for _ in range(20)] label[-1] = scores label[:4] = dets label[4:-1] = landms label = ' '.join(list(map(str, label))) fp1.write(f'{label}\n') gt_label = ' '.join(list(map(str, test_dataset.words[i][0]))) fp2.write(f"# {img_name.lstrip(datadir).lstrip('/')}\n") fp2.write(f'{gt_label}\n') print('im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s'. format(i + 1, num_images, _t['forward_pass'].average_time, _t['misc'].average_time)) # # save image # if args.save_image: # for b in dets: # if b[4] < args.vis_thres: # continue # text = "{:.4f}".format(b[4]) # b = list(map(int, b)) # cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) # cx = b[0] # cy = b[1] + 12 # cv2.putText(img_raw, text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # # save image # if not os.path.exists("./results/"): # os.makedirs("./results/") # name = "./results/" + str(i) + ".jpg" # cv2.imwrite(name, img_raw) fp1.close()
def train(): net = RetinaFace(cfg=cfg) logger.info("Printing net...") logger.info(net) if args.resume_net is not None: logger.info('Loading resume network...') state_dict = torch.load(args.resume_net) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() epoch = 0 + args.resume_epoch logger.info('Loading Dataset...') trainset = WiderFaceDetection(training_dataset, preproc=train_preproc(img_dim, rgb_mean), mode='train') validset = WiderFaceDetection(training_dataset, preproc=valid_preproc(img_dim, rgb_mean), mode='valid') # trainset = WiderFaceDetection(training_dataset, transformers=train_transformers(img_dim), mode='train') # validset = WiderFaceDetection(training_dataset, transformers=valid_transformers(img_dim), mode='valid') trainloader = data.DataLoader(trainset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) validloader = data.DataLoader(validset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) logger.info(f'Totally {len(trainset)} training samples and {len(validset)} validating samples.') epoch_size = math.ceil(len(trainset) / batch_size) max_iter = max_epoch * epoch_size logger.info(f'max_epoch: {max_epoch:d} epoch_size: {epoch_size:d}, max_iter: {max_iter:d}') # optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) optimizer = optim.Adam(net.parameters(), lr=initial_lr, weight_decay=weight_decay) scheduler = _utils.get_linear_schedule_with_warmup(optimizer, int(0.1 * max_iter), max_iter) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 best_loss_val = float('inf') for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator # batch_iterator = iter(tqdm(trainloader, total=len(trainloader))) batch_iterator = iter(trainloader) # if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']): # torch.save(net.state_dict(), save_folder + cfg['name']+ '_epoch_' + str(epoch) + '.pth') epoch += 1 torch.cuda.empty_cache() if (valid_steps > 0) and (iteration > 0) and (iteration % valid_steps == 0): net.eval() # validation loss_l_val = 0. loss_c_val = 0. loss_landm_val = 0. loss_val = 0. # for val_no, (images, targets) in tqdm(enumerate(validloader), total=len(validloader)): for val_no, (images, targets) in enumerate(validloader): # load data images = images.cuda() targets = [anno.cuda() for anno in targets] # forward with torch.no_grad(): out = net(images) loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss_l_val += loss_l.item() loss_c_val += loss_c.item() loss_landm_val += loss_landm.item() loss_val += loss.item() loss_l_val /= len(validloader) loss_c_val /= len(validloader) loss_landm_val /= len(validloader) loss_val /= len(validloader) logger.info('[Validating] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_val, loss_l_val, loss_c_val, loss_landm_val)) if loss_val < best_loss_val: best_loss_val = loss_val pth = os.path.join(save_folder, cfg['name'] + '_iter_' + str(iteration) + f'_{loss_val:.4f}_' + '.pth') torch.save(net.state_dict(), pth) logger.info(f'Best validating loss: {best_loss_val:.4f}, model saved as {pth:s})') net.train() load_t0 = time.time() # if iteration in stepvalues: # step_index += 1 # lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() scheduler.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) if iteration % verbose_steps == 0: logger.info('[Training] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss.item(), loss_l.item(), loss_c.item(), loss_landm.item(), scheduler.get_last_lr()[-1], batch_time, str(datetime.timedelta(seconds=eta))))
class RetinaFaceDet(object): def __init__(self,model_type="mobile0.25",model_path="./weights/mobilenet0.25_Final.pth", backbone_location="./weights/mobilenetV1X0.25_pretrain.tar",use_cpu=True,loading=True): self.cfg = None self.use_cpu = use_cpu self.model_path = model_path if model_type == "mobile0.25": self.cfg = cfg_mnet elif model_type == "resnet50": self.cfg = cfg_re50 self.device = torch.device("cpu" if use_cpu else "cuda") self.net = RetinaFace(cfg=self.cfg,phase="test",backbone_location=backbone_location) if loading: print('No model path exist!') if not os.path.exists(model_path) else None self.loading() self._priors = None self.im_width = 0 self.im_height = 0 self.im_nch = 0 def _get_model(self): self.net.eval() return self.net def loading(self): self.net = load_model(self.net, self.model_path, self.use_cpu) self.net.eval() self.net = self.net.to(self.device) def set_default_size(self,imgshape=[640,480,3]):#[H,W,nCh] im_height, im_width, im_nch = imgshape if im_height == self.im_height and im_width == self.im_width and self._priors is not None: pass else: self.im_height,self.im_width,self.im_nch = imgshape """ priorbox shape [-1,4]; dim0: number of predicted bbox from network; dim1:[x_center,y_center,w,h] priorbox存储的内容分别是bbox中心点的位置以及人脸预设的最小尺寸,长宽比例通过variance解决 这里的数值都是相对图像尺寸而言的相对值,取值在(0,1)之间 """ priorbox = PriorBox(self.cfg,image_size=(self.im_height,self.im_width)) self._priors = priorbox.forward() @torch.no_grad() def execute_batch_mlu(self,net_output,batch_shape,threshold=0.8,topk=5000,keep_topk=750,nms_threshold=0.2): locs,confs,landmss = net_output nB, nCh, im_height, im_width = batch_shape scale = torch.Tensor([im_width, im_height]*2) scale1 = torch.Tensor([im_width, im_height] * 5) detss = [] if im_height == self.im_height and im_width == self.im_width and self._priors is not None: pass else: self.set_default_size([im_height, im_width, nCh]) priors = self._priors.unsqueeze(dim=0) boxes = batch_decode(locs, priors, self.cfg['variance']) boxes = boxes * scale scores = confs[:, :, 1] landms = batch_decode_landm(landmss, priors, self.cfg['variance']) landms = landms * scale1 landms = landms.data.cpu().numpy() scores = scores.data.cpu().numpy() boxes = boxes.data.cpu().numpy() for n in range(nB): _landms = landms[n] _scores = scores[n] _boxes = boxes[n] # ignore low scores inds = np.where(_scores > threshold)[0] _boxes = _boxes[inds] _landms = _landms[inds] _scores = _scores[inds] # keep top-K before NMS order = _scores.argsort()[::-1][:topk] _boxes = _boxes[order] _landms = _landms[order] _scores = _scores[order] # do NMS dets = np.hstack((_boxes, _scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] _landms = _landms[keep] # keep top-K faster NMS dets = dets[:keep_topk, :] _landms = _landms[:keep_topk, :] # x0,y0,x1,y1,score,landmarks... dets = np.concatenate((dets, _landms), axis=1) detss.append(dets) return detss def execute_batch(self,img_batch,threshold=0.8,topk=5000,keep_topk=750,nms_threshold=0.2): resize = 1 with torch.no_grad(): img_batch = img_batch.to(self.device) locs,confs,landmss = self.net(img_batch) nB,nCh,im_height, im_width = img_batch.shape scale = torch.Tensor([im_width, im_height, im_width, im_height]) scale = scale.to(self.device) if im_height == self.im_height and im_width == self.im_width and self._priors is not None: pass else: self.set_default_size([im_height,im_width,nCh]) priors = self._priors priors = priors.to(self.device) prior_data = priors.data detss = [] """ 以bbox的location为例子,最终要得到的是: bbox_center_x bbox_center_y bbox_w bbox_h 但是,直接预测这些数值是困难的,所以需要脱离图像的尺寸,压缩到0-1的范围,所以我们改为预测:_bbox_center_x,_bbox_center_y,_bbox_w,_bbox_w,他们的关系如下: bbox_center_x = (_bbox_center_x)*imgW bbox_center_y = (_bbox_center_y)*imgH bbox_w = (_bbox_w)*imgW bbox_h = (_bbox_w)*imgH 进一步,引入anchor的概念,即预先设定多个bbox的中心和最小的人脸长宽。我们只预测真实值与预设值之间的比例、偏移关系, 模型预测结果为[x_offset,y_offset,w_scale,h_scale] 预设bbox为[x_center,y_center,face_w,face_h] 即prior_data vx,vy控制人脸的长宽比 他们之间相互关系为: _bbox_center_x = x_center + x_offset*face_w*vx _bbox_center_y = y_center + y_offset*face_h*vy _bbox_w = face_w*exp(w_scale*vx) _bbox_h = face_h*exp(h_scale*vy) 最终得到: bbox_center_x = (x_center + x_offset*face_w*vx)*imgW bbox_center_y = (y_center + y_offset*face_h*vy)*imgH bbox_w = (face_w*exp(w_scale*vx))*imgW bbox_h = (face_h*exp(h_scale*vy))*imgH """ for idx in range(nB): loc = locs[idx] conf = confs[idx] landms = landmss[idx] """ 对loc而言,网络输出的是shape为[-1,4]的矩阵,dim1是[x_offset,y_offset,w_scale,h_scale],需要通过decode进行恢复到正常的bbox loc: [-1,4]; dim1: [x_offset,y_offset,w_scale,h_scale] prior_data: [-1,4]; dim1:[x_center,y_center,face_w,face_h] 虽然这里的face_w!= face_h,但本质上是相等的,因为是face_w/face_h相对图像尺寸的值。所以,本质上这里是正方形的anchor,需要variance来控制长宽比。 variance: [vx,vy] 控制长宽比例 _bbox_center_x = x_center + x_offset*face_w*vx _bbox_center_y = y_center + y_offset*face_h*vy _bbox_w = face_w*exp(w_scale*vx) _bbox_h = face_h*exp(h_scale*vy) 进一步,转为left top corner x, left top corner y, right bottom corner x, right bottom corner y的形式 """ boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) """ 之前的结果都是normalize的结果,即(0,1),因此,需要重新rescale回去。 这个scale即图像的大小。 """ boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] """ 基本原理同上 """ landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height]) scale1 = scale1.to(self.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:topk] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) """ py_cpu_nms 非极大抑制 基本原理,假设dets是一个队列,每次取队列第一个元素(pop),并加入到keep的list中,将该元素与dets队列中其它元素比较,剔除bbox交集大于nms_threshold的元素。 然后不断循环,直到dets为空。 """ keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_topk, :] landms = landms[:keep_topk, :] # x0,y0,x1,y1,score,landmarks... dets = np.concatenate((dets, landms), axis=1) detss.append(dets) return detss def execute(self,img_cv,threshold=0.6,topk=5000,keep_topk=750,nms_threshold=0.7): resize = 1 with torch.no_grad(): img = np.float32(img_cv) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf, landms = self.net(img) # forward pass if im_height == self.im_height and im_width == self.im_width and self._priors is not None: pass else: self.set_default_size([im_height,im_width,3]) priors = self._priors priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(self.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:topk] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_topk, :] landms = landms[:keep_topk, :] # x0,y0,x1,y1,score,landmarks... dets = np.concatenate((dets, landms), axis=1) return dets def execute_debug(self,img_cv,threshold=0.6,topk=5000,keep_topk=750,nms_threshold=0.7): resize = 1 dtime = {'detection': [], 'nms': [], 'decode': [], } tob = timer(display=False) with torch.no_grad(): img = np.float32(img_cv) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) # img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) data = img.reshape(-1).tolist() jsd = { "data":[ { "INPUT":{ "content":data, "shape":[460,640] } } ] } # jsdata = json.dumps( jsd ) store(jsd) tob.start() loc, conf, landms = self.net(img) # forward pass utm = tob.eclapse() dtime['detection'].append(utm) if im_height == self.im_height and im_width == self.im_width and self._priors is not None: pass else: self.set_default_size([im_height,im_width,3]) priors = self._priors tob.start() priors = priors.to(self.device) prior_data = priors.data print('nin=', prior_data.shape[0]) print('loc.data',loc.data.squeeze(0)[:2,:]) boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) print('boxes',boxes[:2,:]) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] print('landms.data',landms.data.squeeze(0)[:2,:]) landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) print('landms', landms[:2, :]) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(self.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] print('nout=', scores.shape[0]) # keep top-K before NMS order = scores.argsort()[::-1][:topk] boxes = boxes[order] landms = landms[order] scores = scores[order] utm = tob.eclapse() dtime['decode'].append(utm) tob.start() # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) print('nms in=', dets.shape[0]) keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] print('nms out=', dets.shape[0]) landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_topk, :] landms = landms[:keep_topk, :] # x0,y0,x1,y1,score,landmarks... dets = np.concatenate((dets, landms), axis=1) utm = tob.eclapse() dtime['nms'].append(utm) return dets,dtime
class RetinaFaceDetector: def __init__(self, trained_model='weights/Resnet50_Final.pth', network='resnet50', cpu=False, confidence_threshold=0.02, top_k=5000, nms_threshold=0.4, keep_top_k=750, show_image=False, vis_thres=0.6 ): self.cpu = cpu self.confidence_threshold = confidence_threshold self.top_k = top_k self.nms_threshold = nms_threshold self.keep_top_k = keep_top_k self.show_image = show_image self.vis_thres = vis_thres torch.set_grad_enabled(False) self.cfg = None if network == "mobile0.25": self.cfg = cfg_mnet elif network == "resnet50": self.cfg = cfg_re50 self.cfg['pretrain'] = False # net and model self.net = RetinaFace(cfg=self.cfg, phase = 'test') self.net = load_model(self.net, os.path.join(os.path.dirname(inspect.getabsfile(RetinaFace)), '../' + trained_model), cpu) self.net.eval() print('Finished loading model!') print(self.net) cudnn.benchmark = True self.device = torch.device("cpu" if cpu else "cuda") self.net = self.net.to(self.device) self.resize = 1 def detect_faces(self, img_raw, mean=(104, 117, 123)): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= mean img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) tic = time.time() loc, conf, landms = self.net(img) # forward pass # print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(self.device) landms = landms * scale1 / self.resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) # keep = nms(dets, self.nms_threshold,force_cpu=self.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image if self.show_image: for b in dets: if b[4] < self.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # Show image cv2.imshow('result', img_raw) cv2.waitKey(100) results = [] for det in dets: box = det[:4] score = det[4] keypoints = det[5:] if score < self.vis_thres: continue results.append({'box':box.tolist(), 'score':score.tolist(), 'keypoints':keypoints.tolist()}) return results def detect_faces_batch(self, img_raws, mean=(104, 117, 123)): imgs = [] for img_raw in img_raws: imgs.append(np.float32(img_raw)) imgs = np.stack(imgs, 0) batch_size, im_height, im_width, _ = imgs.shape scale = torch.Tensor([imgs.shape[2], imgs.shape[1], imgs.shape[2], imgs.shape[1]]) imgs -= mean imgs = imgs.transpose(0, 3, 1, 2) imgs = torch.from_numpy(imgs) imgs = imgs.to(self.device) scale = scale.to(self.device) tic = time.time() loc, conf, landms = self.net(imgs) # forward pass # print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data list_results = [] for idx in range(batch_size): boxes = decode(loc.data[idx], prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.data[idx].cpu().numpy()[:, 1] keypoints = decode_landm(landms[idx].data, prior_data, self.cfg['variance']) scale1 = torch.Tensor([ imgs.shape[3], imgs.shape[2], imgs.shape[3], imgs.shape[2], imgs.shape[3], imgs.shape[2], imgs.shape[3], imgs.shape[2], imgs.shape[3], imgs.shape[2]]) scale1 = scale1.to(self.device) keypoints = keypoints * scale1 / self.resize keypoints = keypoints.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] keypoints = keypoints[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] keypoints = keypoints[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) # keep = nms(dets, self.nms_threshold,force_cpu=self.cpu) dets = dets[keep, :] keypoints = keypoints[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] keypoints = keypoints[:self.keep_top_k, :] dets = np.concatenate((dets, keypoints), axis=1) # show image if self.show_image: for b in dets: if b[4] < self.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # keypoints cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # Show image cv2.imshow('result', img_raw) cv2.waitKey(100) results = [] for det in dets: box = det[:4] score = det[4] keypoints = det[5:] if score < self.vis_thres: continue results.append({'box':box.tolist(), 'score':score.tolist(), 'keypoints':keypoints.tolist()}) list_results.append(results) return list_results
def main(): args = get_args() torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() print("Finished loading model!") print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) resize = 1 # testing begin for _ in range(100): image_path = "./curve/test.jpg" img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width = img.shape[:2] scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print("net forward time: {:.4f}".format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"]) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"]) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image name = "test.jpg" cv2.imwrite(name, img_raw)
class RetinaFaceDetector: def __init__(self, device, pretrained_model): self.device = device self.cfg = { 'name': 'Resnet50', 'min_sizes': [[16, 32], [64, 128], [256, 512]], 'steps': [8, 16, 32], 'variance': [0.1, 0.2], 'clip': False, 'loc_weight': 2.0, 'gpu_train': True, 'batch_size': 24, 'ngpu': 4, 'epoch': 100, 'decay1': 70, 'decay2': 90, 'image_size': 840, 'pretrain': True, 'return_layers': { 'layer2': 1, 'layer3': 2, 'layer4': 3 }, 'in_channel': 256, 'out_channel': 256 } self.net = RetinaFace(cfg=self.cfg, phase='test') self.net = load_model(self.net, pretrained_model, device) self.net.eval() self.net = self.net.to(device) def predict(self, image, confidence_threshold=0.02, top_k=5000, nms_threshold=0.4, keep_top_k=750): torch.set_grad_enabled(False) img = np.float32(image) resize = 1 im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf, landms = self.net(img) # forward pass # print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(self.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) bboxes, landmarks, confident_scores = dets[:, :4], dets[:, 5:], dets[:, 4] boxes = [] if bboxes is None: boxes = None else: for box in bboxes: x0, y0, x1, y1 = tuple(box.astype(int)) height, width = y1 - y0, x1 - x0 distance = max(height, width) if height < distance: gap = distance - height y0 -= gap / 2 y1 += gap / 2 elif width < distance: gap = distance - width x0 -= gap / 2 x1 += gap / 2 if y0 < 0: y1 -= y0 y0 = 0 if x0 < 0: x1 -= x0 x0 = 0 boxes.append([x0, y0, x1, y1]) boxes = np.array(boxes).astype(int) return boxes, landmarks.reshape(-1, 5, 2), confident_scores
def main(): args = get_args() torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() print("Finished loading model!") print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # testing dataset testset_folder = args.dataset_folder testset_list = args.dataset_folder[:-7] + "wider_val.txt" with open(testset_list, "r") as fr: test_dataset = fr.read().split() num_images = len(test_dataset) _t = {"forward_pass": Timer(), "misc": Timer()} # testing begin for i, img_name in enumerate(test_dataset): image_path = testset_folder + img_name img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) # testing scale target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t["forward_pass"].tic() loc, conf, landms = net(img) # forward pass _t["forward_pass"].toc() _t["misc"].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"]) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"]) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t["misc"].toc() # -------------------------------------------------------------------- save_name = args.save_folder + img_name[:-4] + ".txt" dirname = os.path.dirname(save_name) if not os.path.isdir(dirname): os.makedirs(dirname) with open(save_name, "w") as fd: bboxs = dets file_name = os.path.basename(save_name)[:-4] + "\n" bboxs_num = str(len(bboxs)) + "\n" fd.write(file_name) fd.write(bboxs_num) for box in bboxs: x = int(box[0]) y = int(box[1]) w = int(box[2]) - int(box[0]) h = int(box[3]) - int(box[1]) confidence = str(box[4]) line = str(x) + " " + str(y) + " " + str(w) + " " + str( h) + " " + confidence + " \n" fd.write(line) print("im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s". format(i + 1, num_images, _t["forward_pass"].average_time, _t["misc"].average_time)) # save image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if not os.path.exists("./results/"): os.makedirs("./results/") name = "./results/" + str(i) + ".jpg" cv2.imwrite(name, img_raw)
def __init__(self, input_mode=0, output_mode=0, record_video=False, email_to_share=None, channel=0, on_gpu=False, display=False, only_headcount=False, send_to_nvr=False, parallel=False): self.save_into_sheet = True self.on_gpu = on_gpu self.send_to_nvr = send_to_nvr if email_to_share == None: self.save_into_sheet = False if self.save_into_sheet or self.send_to_nvr: self.api = API(email_to_share) uri = 'rtsp://' + secrets.ip_camera_login + ':' + secrets.ip_camera_password + \ '@{}:554/cam/realmonitor?channel=1&subtype=0&unicast=true&proto=Onvif' self.input_mode = input_mode self.output_mode = output_mode # 0 - pretty display, 1 - separate graph, 2 - graph with black background self.record_video = record_video self.display = display self.only_headcount = only_headcount if input_mode == 0: self.channel = 0 # webcam elif input_mode == 1: # ip camera self.channel = uri.format(channel) self.ip = channel elif input_mode == 2: # video self.channel = channel if parallel and not on_gpu: self.parallel = True else: self.parallel = False # from classifier by Sizykh Ivan self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.class_labels = [ 'ANGRY', 'DISGUST', 'FEAR', 'HAPPY', 'SAD', 'SURPRISE', 'NEUTRAL' ] # PATH = "./check_points_4/net_714.pth" PATH = "./net_714.pth" if self.on_gpu: self.classifier = Classifier().to(self.device) self.classifier.load_state_dict(torch.load(PATH)) else: self.classifier = Classifier() self.classifier.load_state_dict( torch.load(PATH, map_location={'cuda:0': 'cpu'})) # from detector by Belyakova Katerina self.parser = argparse.ArgumentParser(description='Retinaface') self.parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth', type=str, help='Trained state_dict file path to open') self.parser.add_argument( '--network', default='resnet50', help='Backbone network mobile0.25 or resnet50') self.parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference') self.parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold') self.parser.add_argument('--top_k', default=5000, type=int, help='top_k') self.parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold') self.parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k') self.parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results') self.parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold') self.parser.add_argument('-v', '--video', default='vid.mp4', type=str) self.parser_args = self.parser.parse_args() self.resize = 1 """sets parameters for RetinaFace, prerun() is used once while first usege of run()""" torch.set_grad_enabled(False) cfg = None if self.parser_args.network == "mobile0.25": cfg = cfg_mnet elif self.parser_args.network == "resnet50": cfg = cfg_re50 # net and model detector = RetinaFace(cfg=cfg, phase='test') detector = self.load_model( model=detector, pretrained_path=self.parser_args.trained_model, load_to_cpu=self.parser_args.cpu) detector.eval() print('Finished loading model!') print(detector) if self.on_gpu: cudnn.benchmark = True self.detector = detector.to(self.device) else: self.detector = detector self.cfg = cfg
def main(): args = get_args() torch.set_grad_enabled(False) if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 else: raise NotImplementedError(f"Only mobile0.25 and resnet50 are suppoted.") # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() if args.fp16: net = net.half() print("Finished loading model!") cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) file_paths = sorted(args.input_path.rglob("*.mp4"))[: args.num_videos] if args.num_gpu is not None: start, end = split_array(len(file_paths), args.num_gpu, args.gpu_id) file_paths = file_paths[start:end] output_path = args.output_path if args.save_boxes: output_label_path = output_path / "labels" output_label_path.mkdir(exist_ok=True, parents=True) if args.save_crops: output_image_path = output_path / "images" output_image_path.mkdir(exist_ok=True, parents=True) if args.video_decoder == "cpu": decode_device = cpu(0) elif args.video_decoder == "gpu": decode_device = gpu(0) else: raise NotImplementedError(f"Only CPU and GPU devices are supported by decard, but got {args.video_decoder}") transform = albu.Compose([albu.Normalize(p=1, mean=(104, 117, 123), std=(1.0, 1.0, 1.0), max_pixel_value=1)], p=1) with torch.no_grad(): for video_path in tqdm(file_paths): labels = [] video_id = video_path.stem with video_reader(str(video_path), ctx=decode_device) as video: len_video = len(video) if args.num_frames is None or args.num_frames == 1: frame_ids = list(range(args.num_frames)) elif args.num_frames > 1: if len_video < args.num_frames: step = 1 else: step = int(len_video / args.num_frames) frame_ids = list(range(0, len_video, step))[: args.num_frames] else: raise ValueError(f"Expect None or integer > 1 for args.num_frames, but got {args.num_frames}") frames = video.get_batch(frame_ids) if args.video_decoder == "cpu": frames = frames.asnumpy() elif args.video_decoder == "gpu": frames = dlpack.from_dlpack(frames.to_dlpack()) if args.video_decoder == "gpu": del video torch.cuda.empty_cache() gc.collect() num_frames = len(frames) image_height = frames.shape[1] image_width = frames.shape[2] scale1 = torch.Tensor( [ image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, ] ) scale1 = scale1.to(device) scale = torch.Tensor([image_width, image_height, image_width, image_height]) scale = scale.to(device) priorbox = PriorBox(cfg, image_size=(image_height, image_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data if args.resize_coeff is not None: target_size = min(args.resize_coeff) max_size = max(args.resize_coeff) image_height = frames.shape[1] image_width = frames.shape[2] image_size_min = min([image_width, image_height]) image_size_max = max([image_width, image_height]) resize = float(target_size) / float(image_size_min) if np.round(resize * image_size_max) > max_size: resize = float(max_size) / float(image_size_max) else: resize = 1 for pred_id in range(num_frames): frame = frames[pred_id] torched_image = prepare_image(frame, transform, args.video_decoder).to(device) if args.fp16: torched_image = torched_image.half() loc, conf, land = net(torched_image) # forward pass frame_id = frame_ids[pred_id] boxes = decode(loc.data[0], prior_data, cfg["variance"]) boxes *= scale / resize boxes = boxes.cpu().numpy() scores = conf[0].data.cpu().numpy()[:, 1] landmarks = decode_landm(land.data[0], prior_data, cfg["variance"]) landmarks *= scale1 / resize landmarks = landmarks.cpu().numpy() # ignore low scores valid_index = np.where(scores > args.confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS detection = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(detection, args.nms_threshold) # keep = nms(detection, args.nms_threshold,force_cpu=args.cpu) # x_min, y_min, x_max, y_max, score detection = detection[keep, :] landmarks = landmarks[keep].astype(int) if detection.shape[0] == 0: continue bboxes = detection[:, :4].astype(int) confidence = detection[:, 4].astype(np.float64) for crop_id in range(len(detection)): bbox = bboxes[crop_id] labels += [ { "frame_id": int(frame_id), "crop_id": crop_id, "bbox": bbox.tolist(), "score": confidence[crop_id], "landmarks": landmarks[crop_id].tolist(), } ] if args.save_crops: x_min, y_min, x_max, y_max = bbox x_min = max(0, x_min) y_min = max(0, y_min) crop = frame[y_min:y_max, x_min:x_max] target_folder = output_image_path / f"{video_id}" target_folder.mkdir(exist_ok=True, parents=True) crop_file_path = target_folder / f"{frame_id}_{crop_id}.jpg" if crop_file_path.exists(): continue cv2.imwrite( str(crop_file_path), cv2.cvtColor(crop, cv2.COLOR_BGR2RGB), [int(cv2.IMWRITE_JPEG_QUALITY), 90], ) if args.save_boxes: result = { "file_path": str(video_path), "file_id": video_id, "bboxes": labels, } with open(output_label_path / f"{video_id}.json", "w") as f: json.dump(result, f, indent=2)
class Inference(object): def __init__(self, weight_path, network, use_cpu=False): self.weight_path = weight_path self.network = network self.use_cpu = use_cpu self.resize = 1 self.confidence_threshold = 0.02 self.nms_threshold = 0.4 self.vis_thres = 0.5 self.input_height = 720 self.input_width = 1280 self._initialize_weight() self.scale = torch.Tensor([1280, 720, 1280, 720]).to(self.device) self.prior_data = self._initialize_priorbox(self.cfg, self.input_height, self.input_width) def _initialize_weight(self): self.cfg = None if self.network == "mobile0.25": self.cfg = cfg_mnet elif self.network == "resnet50": self.cfg = cfg_re50 self.net = RetinaFace(cfg=self.cfg, phase='test') self.net = self._load_model(self.net, self.weight_path, self.use_cpu) self.net.eval() print('Finished loading model!') print(self.net) cudnn.benchmark = True self.device = torch.device("cpu" if self.use_cpu else "cuda") print("self. device : ", self.device) self.net = self.net.to(self.device) def _initialize_priorbox(self, cfg, im_height, im_width): priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data return prior_data def _remove_prefix(self, state_dict, prefix): ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' print('remove prefix \'{}\''.format(prefix)) f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x return {f(key): value for key, value in state_dict.items()} def _check_keys(self, model, pretrained_state_dict): ckpt_keys = set(pretrained_state_dict.keys()) model_keys = set(model.state_dict().keys()) used_pretrained_keys = model_keys & ckpt_keys unused_pretrained_keys = ckpt_keys - model_keys missing_keys = model_keys - ckpt_keys print('Missing keys:{}'.format(len(missing_keys))) print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) print('Used keys:{}'.format(len(used_pretrained_keys))) assert len( used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' return True def _load_model(self, model, pretrained_path, load_to_cpu): print('Loading pretrained model from {}'.format(pretrained_path)) if load_to_cpu: pretrained_dict = torch.load( pretrained_path, map_location=lambda storage, loc: storage) else: device = torch.cuda.current_device() pretrained_dict = torch.load( pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) if "state_dict" in pretrained_dict.keys(): pretrained_dict = self._remove_prefix( pretrained_dict['state_dict'], 'module.') else: pretrained_dict = self._remove_prefix(pretrained_dict, 'module.') self._check_keys(model, pretrained_dict) model.load_state_dict(pretrained_dict, strict=False) return model def _forward(self, img_raw): # img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) if img_raw is None: print("img is None") return None, None, None img = np.float32(img_raw) if self.resize != 1: img = cv2.resize(img, None, None, fx=self.resize, fy=self.resize, interpolation=cv2.INTER_LINEAR) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) loc, conf, landms = self.net(img) # forward pass # decode boxes boxes = decode(loc.data.squeeze(0), self.prior_data, self.cfg['variance']) boxes = boxes * self.scale / self.resize boxes = boxes.cpu().numpy() # scores scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # landmarks landms = decode_landm(landms.data.squeeze(0), self.prior_data, self.cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(self.device) landms = landms * scale1 / self.resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) dets = dets[keep, :] landms = landms[keep] dets = np.concatenate((dets, landms), axis=1) boxes_list = [] socres_list = [] landmarks_list = [] for b in dets: if b[4] < self.vis_thres: continue s = b[4] b = list(map(int, b)) boxes_list.append([b[0], b[1], b[2], b[3]]) socres_list.append(s) landmarks_list.append([ b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12], b[13], b[14] ]) return boxes_list, socres_list, landmarks_list def __call__(self, img_raw): return self._forward(img_raw)
else: pretrained_dict = remove_prefix(pretrained_dict, 'module.') check_keys(model, pretrained_dict) model.load_state_dict(pretrained_dict, strict=False) return model torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnetv1 elif args.network == "mobilenetv2": cfg = cfg_mnetv2 elif args.network == "mobilenetv3": cfg = cfg_mnetv3 elif args.network == "efficientnetb0": cfg = cfg_efnetb0 # net and model model = RetinaFace(cfg=cfg, phase='test') model = load_model(model, args.trained_model, args.cpu) model.eval() print('Finished loading model!') print(model) #cudnn.benchmark = True device = torch.device("cpu") model = model.to(device) example = torch.rand(1, 3, 640, 640) traced_script_module = torch.jit.trace(model, example) traced_script_module.save("face.pt")
def wxf(imgpath): print(imgpath) torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() #print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) image_path = imgpath img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # name = rlsb.sb(imgpath) # save image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue # text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) # cx = b[0] # cy = b[1] + 12 # # # cv2.putText(img_raw, text, (cx, cy), # cv2.FONT_HERSHEY_SCRIPT_COMPLEX, 0.5, (255, 255, 255)) # # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if not os.path.exists("./results/"): os.makedirs("./results/") name = "./results/" + "wxf" + ".jpg" cv2.imwrite(name, img_raw) return name
name="", op_dict=None, producer_op_list=None) return graph if __name__ == '__main__': torch.set_grad_enabled(False) cfg = cfg_mnet net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, "./converted_models/mobilenet/mobilenet0.25_Final.pth", True) net.eval() print('Finish loading model!') #print(net) #cudnn.benchmark = True device = torch.device("cpu") net = net.to(device) img_raw = cv2.imread("./Face_Detector_ncnn/sample.jpg") #img = np.ones((3,240,320), dtype=np.float32) img = np.float32(img_raw) long_side = 320 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(long_side) / float(im_size_min) if np.round(resize * im_size_max) > long_side:
def detect(img_path): torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') #net = FaceBoxes(phase='test', size=None, num_classes=2) net = load_model(net, args.trained_model, args.cpu) net.eval() #print('Finished loading model!') #print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) _t = {'forward_pass': Timer(), 'misc': Timer()} resize = 1 # testing begin # for i, img_name in enumerate(test_dataset): # image_path = testset_folder + img_name + '.jpg' # img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) if type(img_path) is not np.ndarray: img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img_raw = np.array(img) else: img_raw = img_path #img_raw = img_path img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t['forward_pass'].tic() loc, conf, landms = net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) #priorbox = PriorBox1(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS # order = scores.argsort()[::-1][:args.top_k] order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t['misc'].toc() # save dets # if args.dataset == "FDDB": # fw.write('{:s}\n'.format(img_name)) # fw.write('{:.1f}\n'.format(dets.shape[0])) # for k in range(dets.shape[0]): # xmin = dets[k, 0] # ymin = dets[k, 1] # xmax = dets[k, 2] # ymax = dets[k, 3] # score = dets[k, 4] # w = xmax - xmin + 1 # h = ymax - ymin + 1 # # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score)) # fw.write('{:d} {:d} {:d} {:d} {:.10f}\n'.format(int(xmin), int(ymin), int(w), int(h), score)) print('forward_pass_time: {:.4f}s misc: {:.4f}s'.format( _t['forward_pass'].average_time, _t['misc'].average_time)) # if type(img_path) is not np.ndarray: # img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR) # else: # img_raw = img_path # # show image # if args.save_image: # for b in dets: # if b[4] < args.vis_thres: # continue # text = "{:.4f}".format(b[4]) # b = list(map(int, b)) # cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) # cx = b[0] # cy = b[1] + 12 # cv2.putText(img_raw, text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image # if not os.path.exists("./results/"): # os.makedirs("./results/") # name = "./results/" + str(i) + ".jpg" # cv2.imwrite(name, img_raw) return dets, img_path