def main(): args = get_args() torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() print("Finished loading model!") print(net) device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # ------------------------ export ----------------------------- output_onnx = "FaceDetector.onnx" print("==> Exporting model to ONNX format at '{}'".format(output_onnx)) input_names = ["input0"] output_names = ["output0"] inputs = torch.randn(1, 3, args.long_side, args.long_side).to(device) torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False, input_names=input_names, output_names=output_names)
def validate(model_path, network='mobile0.25'): cfg = None if network == "mobile0.25": cfg = cfg_mnet elif network == "resnet50": cfg = cfg_re50 else: raise ValueError(network) net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, model_path, args.cpu) #net.eval() print('Finished loading model!') #print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) net.eval() #net.phase = 'eval' with torch.no_grad(): preds = predict(net, cfg) #net.phase = 'train' #net.train() del net aps = evaluation(preds, './widerface_evaluate/ground_truth/') avg = np.mean(aps) return [avg] + aps
def __init__(self, on_gpu=False): self.on_gpu = on_gpu # from classifier by Sizykh Ivan self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") # parser = argparse.ArgumentParser(description='Retinaface') # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # # parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth', # type=str, help='Trained state_dict file path to open') # parser.add_argument('--network', default='resnet50', help='Backbone network mobile0.25 or resnet50') # # parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference') # parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference') # parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold') # parser.add_argument('--top_k', default=5000, type=int, help='top_k') # parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold') # parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k') # parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results') # parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold') # # parser.add_argument('-v', '--video', default='vid.mp4', type=str) # # args = parser.parse_args() self.trained_model = './weights/Resnet50_Final.pth' self.network = 'resnet50' self.cpu = False self.confidence_threshold = 0.02 self.top_k = 5000 self.nms_threshold = 0.4 self.keep_top_k = 750 self.vis_thres = 0.6 self.resize = 1 torch.set_grad_enabled(False) cfg = None if self.network == "mobile0.25": cfg = cfg_mnet elif self.network == "resnet50": cfg = cfg_re50 # cfg = cfg_re50 # net and model detector = RetinaFace(cfg=cfg, phase='test') detector = self.load_model(model=detector, pretrained_path=self.trained_model, load_to_cpu=self.cpu) detector.eval() print('Finished loading model!') # print(detector) if self.on_gpu: cudnn.benchmark = True self.detector = detector.to(self.device) else: self.detector = detector self.cfg = cfg
def model_cfg(trained_model, cfg, device, cpu): net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, trained_model, load_to_cpu=cpu) net.eval() print('Finished loading model!') cudnn.benchmark = True net = net.to(device) return net
def __init__(self, on_gpu=False, confidence_threshold=0.02, top_k=5000, nms_threshold=0.4, keep_top_k=750, vis_thres=0.6, network='resnet50'): self.on_gpu = on_gpu # from classifier by Sizykh Ivan self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.network = network self.cpu = False self.confidence_threshold = confidence_threshold self.top_k = top_k self.nms_threshold = nms_threshold self.keep_top_k = keep_top_k self.vis_thres = vis_thres if network == 'resnet50': self.trained_model = './weights/Resnet50_Final.pth' else: self.trained_model = './weights/mobilenet0.25_Final.pth' self.resize = 1 torch.set_grad_enabled(False) cfg = None if self.network == "mobile0.25": cfg = cfg_mnet elif self.network == "resnet50": cfg = cfg_re50 # cfg = cfg_re50 # net and model detector = RetinaFace(cfg=cfg, phase='test') detector = self.load_model(model=detector, pretrained_path=self.trained_model, load_to_cpu=self.cpu) detector.eval() print('Finished loading model!') # print(detector) if self.on_gpu: cudnn.benchmark = True self.detector = detector.to(self.device) else: self.detector = detector self.cfg = cfg
def load_net(): torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) return net, device, cfg
def initFaceDetector(): global args, net, device, cfg init_args() torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 net = RetinaFace(cfg=cfg, phase='test') net, forest = load_model(net, args.trained_model, args.cpu, args.forest_path) net.eval() cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device)
def __init__(self, threshold=0.5, network="mobile0.25", device=torch.device("cuda")): torch.set_grad_enabled(False) cfg = None if network == "mobile0.25": cfg = cfg_mnet elif network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, "./weights/mobilenet0.25_Final.pth", False) net.eval() # print('Finished loading model!') # print(net) cudnn.benchmark = True self.device = device net = net.to(self.device) self.net = net torch.set_grad_enabled(False) self._t = {'forward_pass': Timer(), 'misc': Timer()} self.cfg = cfg self.threshold = threshold
def main(): torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # data_dir = '../face_dataset/masked_whn' # target_dir = '../face_dataset/masked_whn_crop' # data_dir = '../face_dataset/CASIA-maxpy-clean' # target_dir = '../face_dataset/CASIA-maxpy-clean_crop' # data_dir = '../frvtTestbed/pnas/images' # target_dir = '../frvtTestbed/pnas_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../frvtTestbed/common/images' # target_dir = '../frvtTestbed/mugshot_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '../face_dataset/calfw/aligned_images' # target_dir = '../face_dataset/calfw/aligned_images_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/cplfw/aligned_images' # target_dir = '../face_dataset/cplfw/aligned_images_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '../face_dataset/Celeba/img_align_celeba' # target_dir = '../face_dataset/Celeba/img_align_celeba_crop' # # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '../face_dataset/GEO_enroll' # target_dir = '../face_dataset/GEO_enroll_crop' # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/GEO_enroll' # target_dir = '../face_dataset/GEO_enroll_large_crop' # crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.1, right_scale=0.1, up_scale=0.1, low_scale=0.1) # # data_dir = '../face_dataset/GEO_Mask_Testing_Dataset' # target_dir = '../face_dataset/GEO_Mask_Testing_Dataset_large_crop' # crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.1, right_scale=0.1, up_scale=0.1, low_scale=0.1) # # data_dir = '../face_dataset/GEO_Mask_Testing_Dataset' # target_dir = '../face_dataset/GEO_Mask_Testing_Dataset_crop' # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/GEO_env_dataset' # target_dir = '../face_dataset/GEO_env_dataset_crop' # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/GEO_identity' # target_dir = '../face_dataset/GEO_identity_crop' # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '../face_dataset/MEDS_II' # target_dir = '../face_dataset/MEDS_II_crop' # crop_face(net, device, cfg, data_dir, target_dir) # # data_dir = '../face_dataset/MEDS_II_mask' # target_dir = '../face_dataset/MEDS_II_mask_crop' # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '/media/bossun/Bossun_TX2/face_dataset/CACD_VS' # target_dir = '/media/bossun/Bossun_TX2/face_dataset/CACD_VS_crop' # crop_face(net, device, cfg, data_dir, target_dir) # data_dir = '../face_dataset/CASIA-maxpy-clean' # target_dir = '../face_dataset/CASIA-maxpy-clean_large_crop' # crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.05, right_scale=0.05, up_scale=0.05, low_scale=0.05) data_dir = '/workspace/data/public/FR/ms1m_database_100k_final/base' target_dir = '/workspace/data/public/FR/ms1m_large_range_crop' crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.05, right_scale=0.05, up_scale=0.05, low_scale=0.05) data_dir = '/workspace/data/public/FR/VGGFACE2_Cleandata/train' target_dir = '/workspace/data/public/FR/VGGFACE2_range_crop' crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.05, right_scale=0.05, up_scale=0.05, low_scale=0.05)
# 加载测试模型 if os.access(ops.landmarks_model, os.F_OK): # checkpoint chkpt = torch.load(ops.landmarks_model, map_location=device) landmarks_model.load_state_dict(chkpt) print('load landmarks model : {}'.format(ops.landmarks_model)) #--------------------------------------------------------------------------- 构建人脸检测模型 cfg = None if ops.detect_network == "mobile0.25": cfg = cfg_mnet elif ops.detect_network == "resnet50": cfg = cfg_re50 # net and model detect_model = RetinaFace(cfg=cfg, phase='test') detect_model = detect_model.to(device) if os.access(ops.detect_model, os.F_OK): # checkpoint chkpt = torch.load(ops.detect_model, map_location=device) detect_model.load_state_dict(chkpt) print('load detect model : {}'.format(ops.detect_model)) detect_model.eval() if use_cuda: cudnn.benchmark = True print('loading model done ~') #-------------------------------------------------------------------------- run vedio video_capture = cv2.VideoCapture(ops.test_path) with torch.no_grad(): idx = 0
def main(): cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet18": cfg = cfg_re18 elif args.network == "resnet34": cfg = cfg_re34 elif args.network == "resnet50": cfg = cfg_re50 elif args.network == "Efficientnet-b0": cfg = cfg_eff_b0 elif args.network == "Efficientnet-b4": cfg = cfg_eff_b4 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # testing dataset testset_folder = args.dataset_folder # testset_list = args.dataset_folder[:-7] + "wider_val.txt" # with open(testset_list, 'r') as fr: # test_dataset = fr.read().split() test_dataset = [] for event in os.listdir(testset_folder): subdir = os.path.join(testset_folder, event) img_names = os.listdir(subdir) for img_name in img_names: test_dataset.append([event, os.path.join(subdir, img_name)]) num_images = len(test_dataset) _t = {'forward_pass': Timer(), 'misc': Timer()} # testing begin for i, (event, img_name) in enumerate(test_dataset): if i % 100 == 0: torch.cuda.empty_cache() # image_path = testset_folder + img_name img_raw = cv2.imread(img_name, cv2.IMREAD_COLOR) img = np.float32(img_raw) # testing scale target_size = 480 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = (img - 127.5) / 128.0 # img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t['forward_pass'].tic() loc, conf, landms = net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t['misc'].toc() # -------------------------------------------------------------------- # save_name = args.save_folder + img_name[:-4] + ".txt" save_name = os.path.join( args.save_folder, event, img_name.split('/')[-1].split('.')[0] + ".txt") dirname = os.path.dirname(save_name) if not os.path.isdir(dirname): os.makedirs(dirname) with open(save_name, "w") as fd: bboxs = dets file_name = os.path.basename(save_name)[:-4] + "\n" bboxs_num = str(len(bboxs)) + "\n" fd.write(file_name) fd.write(bboxs_num) for box in bboxs: x = int(box[0]) y = int(box[1]) w = int(box[2]) - int(box[0]) h = int(box[3]) - int(box[1]) confidence = str(box[4]) line = str(x) + " " + str(y) + " " + str(w) + " " + str( h) + " " + confidence + " \n" fd.write(line) print('im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s'. format(i + 1, num_images, _t['forward_pass'].average_time, _t['misc'].average_time)) # save image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if not os.path.exists("./results/"): os.makedirs("./results/") name = "./results/" + str(i) + ".jpg" cv2.imwrite(name, img_raw)
class Retina_Detector: def __init__(self): torch.set_grad_enabled(False) cudnn.benchmark = True self.opt=get_config() if self.opt.network == "mobile0.25": self.cfg = cfg_mnet elif self.opt.network == "resnet50": self.cfg = cfg_re50 # net and model self.net = RetinaFace(cfg=self.cfg, phase = 'test') self.net = self.load_model(self.net, self.opt.trained_model, self.opt.cpu) self.net.eval() self.net = self.net.to(self.opt.device) def check_keys(self,model, pretrained_state_dict): ckpt_keys = set(pretrained_state_dict.keys()) model_keys = set(model.state_dict().keys()) used_pretrained_keys = model_keys & ckpt_keys unused_pretrained_keys = ckpt_keys - model_keys missing_keys = model_keys - ckpt_keys print('Missing keys:{}'.format(len(missing_keys))) print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) print('Used keys:{}'.format(len(used_pretrained_keys))) assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' return True def remove_prefix(self,state_dict, prefix): ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' print('remove prefix \'{}\''.format(prefix)) f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x return {f(key): value for key, value in state_dict.items()} def load_model(self,model, pretrained_path, load_to_cpu): print('Loading pretrained model from {}'.format(pretrained_path)) if load_to_cpu: pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage) else: device = torch.cuda.current_device() pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) if "state_dict" in pretrained_dict.keys(): pretrained_dict = self.emove_prefix(pretrained_dict['state_dict'], 'module.') else: pretrained_dict = self.remove_prefix(pretrained_dict, 'module.') self.check_keys(model, pretrained_dict) model.load_state_dict(pretrained_dict, strict=False) return model def img_process(self, img): target_size = self.cfg["image_size"] max_size = 1080 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) return im, im_scale def detect(self,img): img,imscale=self.img_process(img) resize=1 img_raw = img img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.opt.device) scale = scale.to(self.opt.device) tic = time.time() loc, conf, landms = self.net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) t1=time.time() priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.opt.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(self.opt.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.opt.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.opt.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.opt.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] print("len ",len(dets)) landms = landms[keep] dets/=imscale landms /=imscale # keep top-K faster NMS dets = dets[:self.opt.keep_top_k, :] boxes=[list(map(int, x)) for x in dets] landms = landms[:self.opt.keep_top_k, :] lands=[list(map(int, x)) for x in landms] # dets = np.concatenate((dets, landms), axis=1) return boxes,lands
def process_video_files( network: str, trained_model: str, decode_gpu: bool, is_fp16: bool, file_paths: list, num_gpu: Optional[int], gpu_id: int, output_path: Path, is_save_boxes: bool, is_save_crops: bool, num_frames: int, resize_coeff: Optional[Tuple], confidence_threshold: float, num_workers: int, nms_threshold: float, batch_size: int, resize_scale: float, min_size: int, keep_top_k: int, ) -> None: torch.set_grad_enabled(False) if network == "mobile0.25": cfg = cfg_mnet_test elif network == "resnet50": cfg = cfg_re50_test else: raise NotImplementedError( f"Only mobile0.25 and resnet50 are suppoted, but we got {network}") if min_size < 0: raise ValueError( f"Min size should be positive, but we got {min_size}.") # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, trained_model, load_to_cpu=False) net.eval() if is_fp16: net = net.half() device = torch.device("cuda") net.to(device) print("Finished loading model!") cudnn.benchmark = True transform = albu.Compose([ albu.Normalize( p=1, mean=(104, 117, 123), std=(1.0, 1.0, 1.0), max_pixel_value=1) ], p=1) if num_gpu is not None: start, end = split_array(len(file_paths), num_gpu, gpu_id) file_paths = file_paths[start:end] with torch.no_grad(): func = partial(get_frames, num_frames=num_frames, resize_coeff=resize_coeff, transform=transform, decode_gpu=decode_gpu) with torch.no_grad(): with concurrent.futures.ProcessPoolExecutor( num_workers) as executor: for result in tqdm(executor.map(func, file_paths), total=len(file_paths), leave=False, desc="Loading data files"): if len(result) != 0: result["is_fp16"] = is_fp16 result["device"] = device result["batch_size"] = batch_size result["cfg"] = cfg result["nms_threshold"] = nms_threshold result["confidence_threshold"] = confidence_threshold result["is_save_crops"] = is_save_crops result["is_save_boxes"] = is_save_boxes result["output_path"] = output_path result["net"] = net result["min_size"] = min_size result["resize_scale"] = resize_scale result["keep_top_k"] = keep_top_k process_frames(**result)
def __init__(self, input_mode=0, output_mode=0, record_video=False, email_to_share=None, channel=0, on_gpu=False, display=False, only_headcount=False, send_to_nvr=False, parallel=False): self.save_into_sheet = True self.on_gpu = on_gpu self.send_to_nvr = send_to_nvr if email_to_share == None: self.save_into_sheet = False if self.save_into_sheet or self.send_to_nvr: self.api = API(email_to_share) uri = 'rtsp://' + secrets.ip_camera_login + ':' + secrets.ip_camera_password + \ '@{}:554/cam/realmonitor?channel=1&subtype=0&unicast=true&proto=Onvif' self.input_mode = input_mode self.output_mode = output_mode # 0 - pretty display, 1 - separate graph, 2 - graph with black background self.record_video = record_video self.display = display self.only_headcount = only_headcount if input_mode == 0: self.channel = 0 # webcam elif input_mode == 1: # ip camera self.channel = uri.format(channel) self.ip = channel elif input_mode == 2: # video self.channel = channel if parallel and not on_gpu: self.parallel = True else: self.parallel = False # from classifier by Sizykh Ivan self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.class_labels = [ 'ANGRY', 'DISGUST', 'FEAR', 'HAPPY', 'SAD', 'SURPRISE', 'NEUTRAL' ] # PATH = "./check_points_4/net_714.pth" PATH = "./net_714.pth" if self.on_gpu: self.classifier = Classifier().to(self.device) self.classifier.load_state_dict(torch.load(PATH)) else: self.classifier = Classifier() self.classifier.load_state_dict( torch.load(PATH, map_location={'cuda:0': 'cpu'})) # from detector by Belyakova Katerina self.parser = argparse.ArgumentParser(description='Retinaface') self.parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth', type=str, help='Trained state_dict file path to open') self.parser.add_argument( '--network', default='resnet50', help='Backbone network mobile0.25 or resnet50') self.parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference') self.parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold') self.parser.add_argument('--top_k', default=5000, type=int, help='top_k') self.parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold') self.parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k') self.parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results') self.parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold') self.parser.add_argument('-v', '--video', default='vid.mp4', type=str) self.parser_args = self.parser.parse_args() self.resize = 1 """sets parameters for RetinaFace, prerun() is used once while first usege of run()""" torch.set_grad_enabled(False) cfg = None if self.parser_args.network == "mobile0.25": cfg = cfg_mnet elif self.parser_args.network == "resnet50": cfg = cfg_re50 # net and model detector = RetinaFace(cfg=cfg, phase='test') detector = self.load_model( model=detector, pretrained_path=self.parser_args.trained_model, load_to_cpu=self.parser_args.cpu) detector.eval() print('Finished loading model!') print(detector) if self.on_gpu: cudnn.benchmark = True self.detector = detector.to(self.device) else: self.detector = detector self.cfg = cfg
def main(): args = get_args() torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() print("Finished loading model!") print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) args.save_folder.mkdir(exist_ok=True) fw = open(os.path.join(args.save_folder, args.dataset + "_dets.txt"), "w") # testing dataset testset_folder = os.path.join("data", args.dataset, "images/") testset_list = os.path.join("data", args.dataset, "img_list.txt") with open(testset_list, "r") as fr: test_dataset = fr.read().split() num_images = len(test_dataset) # testing scale resize = 1 _t = {"forward_pass": Timer(), "misc": Timer()} # testing begin for i, img_name in enumerate(test_dataset): image_path = testset_folder + img_name + ".jpg" img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t["forward_pass"].tic() loc, conf, landms = net(img) # forward pass _t["forward_pass"].toc() _t["misc"].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"]) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"]) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS # order = scores.argsort()[::-1][:args.top_k] order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t["misc"].toc() # save dets if args.dataset == "FDDB": fw.write("{:s}\n".format(img_name)) fw.write("{:.1f}\n".format(dets.shape[0])) for k in range(dets.shape[0]): xmin = dets[k, 0] ymin = dets[k, 1] xmax = dets[k, 2] ymax = dets[k, 3] score = dets[k, 4] w = xmax - xmin + 1 h = ymax - ymin + 1 # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score)) fw.write("{:d} {:d} {:d} {:d} {:.10f}\n".format( int(xmin), int(ymin), int(w), int(h), score)) print("im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s". format(i + 1, num_images, _t["forward_pass"].average_time, _t["misc"].average_time)) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if not os.path.exists("./results/"): os.makedirs("./results/") name = "./results/" + str(i) + ".jpg" cv2.imwrite(name, img_raw) fw.close()
else: pretrained_dict = remove_prefix(pretrained_dict, 'module.') check_keys(model, pretrained_dict) model.load_state_dict(pretrained_dict, strict=False) return model torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnetv1 elif args.network == "mobilenetv2": cfg = cfg_mnetv2 elif args.network == "mobilenetv3": cfg = cfg_mnetv3 elif args.network == "efficientnetb0": cfg = cfg_efnetb0 # net and model model = RetinaFace(cfg=cfg, phase='test') model = load_model(model, args.trained_model, args.cpu) model.eval() print('Finished loading model!') print(model) #cudnn.benchmark = True device = torch.device("cpu") model = model.to(device) example = torch.rand(1, 3, 640, 640) traced_script_module = torch.jit.trace(model, example) traced_script_module.save("face.pt")
def main(): args = get_args() torch.set_grad_enabled(False) if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 else: raise NotImplementedError(f"Only mobile0.25 and resnet50 are suppoted.") # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() if args.fp16: net = net.half() print("Finished loading model!") cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) file_paths = sorted(args.input_path.rglob("*.mp4"))[: args.num_videos] if args.num_gpu is not None: start, end = split_array(len(file_paths), args.num_gpu, args.gpu_id) file_paths = file_paths[start:end] output_path = args.output_path if args.save_boxes: output_label_path = output_path / "labels" output_label_path.mkdir(exist_ok=True, parents=True) if args.save_crops: output_image_path = output_path / "images" output_image_path.mkdir(exist_ok=True, parents=True) if args.video_decoder == "cpu": decode_device = cpu(0) elif args.video_decoder == "gpu": decode_device = gpu(0) else: raise NotImplementedError(f"Only CPU and GPU devices are supported by decard, but got {args.video_decoder}") transform = albu.Compose([albu.Normalize(p=1, mean=(104, 117, 123), std=(1.0, 1.0, 1.0), max_pixel_value=1)], p=1) with torch.no_grad(): for video_path in tqdm(file_paths): labels = [] video_id = video_path.stem with video_reader(str(video_path), ctx=decode_device) as video: len_video = len(video) if args.num_frames is None or args.num_frames == 1: frame_ids = list(range(args.num_frames)) elif args.num_frames > 1: if len_video < args.num_frames: step = 1 else: step = int(len_video / args.num_frames) frame_ids = list(range(0, len_video, step))[: args.num_frames] else: raise ValueError(f"Expect None or integer > 1 for args.num_frames, but got {args.num_frames}") frames = video.get_batch(frame_ids) if args.video_decoder == "cpu": frames = frames.asnumpy() elif args.video_decoder == "gpu": frames = dlpack.from_dlpack(frames.to_dlpack()) if args.video_decoder == "gpu": del video torch.cuda.empty_cache() gc.collect() num_frames = len(frames) image_height = frames.shape[1] image_width = frames.shape[2] scale1 = torch.Tensor( [ image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, ] ) scale1 = scale1.to(device) scale = torch.Tensor([image_width, image_height, image_width, image_height]) scale = scale.to(device) priorbox = PriorBox(cfg, image_size=(image_height, image_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data if args.resize_coeff is not None: target_size = min(args.resize_coeff) max_size = max(args.resize_coeff) image_height = frames.shape[1] image_width = frames.shape[2] image_size_min = min([image_width, image_height]) image_size_max = max([image_width, image_height]) resize = float(target_size) / float(image_size_min) if np.round(resize * image_size_max) > max_size: resize = float(max_size) / float(image_size_max) else: resize = 1 for pred_id in range(num_frames): frame = frames[pred_id] torched_image = prepare_image(frame, transform, args.video_decoder).to(device) if args.fp16: torched_image = torched_image.half() loc, conf, land = net(torched_image) # forward pass frame_id = frame_ids[pred_id] boxes = decode(loc.data[0], prior_data, cfg["variance"]) boxes *= scale / resize boxes = boxes.cpu().numpy() scores = conf[0].data.cpu().numpy()[:, 1] landmarks = decode_landm(land.data[0], prior_data, cfg["variance"]) landmarks *= scale1 / resize landmarks = landmarks.cpu().numpy() # ignore low scores valid_index = np.where(scores > args.confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS detection = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(detection, args.nms_threshold) # keep = nms(detection, args.nms_threshold,force_cpu=args.cpu) # x_min, y_min, x_max, y_max, score detection = detection[keep, :] landmarks = landmarks[keep].astype(int) if detection.shape[0] == 0: continue bboxes = detection[:, :4].astype(int) confidence = detection[:, 4].astype(np.float64) for crop_id in range(len(detection)): bbox = bboxes[crop_id] labels += [ { "frame_id": int(frame_id), "crop_id": crop_id, "bbox": bbox.tolist(), "score": confidence[crop_id], "landmarks": landmarks[crop_id].tolist(), } ] if args.save_crops: x_min, y_min, x_max, y_max = bbox x_min = max(0, x_min) y_min = max(0, y_min) crop = frame[y_min:y_max, x_min:x_max] target_folder = output_image_path / f"{video_id}" target_folder.mkdir(exist_ok=True, parents=True) crop_file_path = target_folder / f"{frame_id}_{crop_id}.jpg" if crop_file_path.exists(): continue cv2.imwrite( str(crop_file_path), cv2.cvtColor(crop, cv2.COLOR_BGR2RGB), [int(cv2.IMWRITE_JPEG_QUALITY), 90], ) if args.save_boxes: result = { "file_path": str(video_path), "file_id": video_id, "bboxes": labels, } with open(output_label_path / f"{video_id}.json", "w") as f: json.dump(result, f, indent=2)
cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 elif args.network == "mobile0.25_highway": cfg = cfg_mnet_highway # load pre-trained model if args.network == "mobile0.25": from models.retinaface import RetinaFace model = RetinaFace(cfg=cfg, phase='test') elif args.network == "mobile0.25_highway": from models.retinaface_highway import RetinaFaceHighway model = RetinaFaceHighway(cfg=cfg, phase='test') model = load_model(model, args.trained_model, args.cpu) model.to(device) def fine_tuner(masked_model, epochs=5): train(masked_model, cfg, resume_epoch=cfg['epoch'] - epochs) def evaluator(masked_model, level='average'): evaluate(masked_model, cfg) cmd = 'cd ./widerface_evaluate \ && python3 setup.py build_ext --inplace \ && python3 evaluation.py -e {} \ && cd ..'.format(args.experiment_data_dir) os.system(cmd) with open(
from models.retinaface import RetinaFace from utils.net_utils import load_model, image_process, process_face_data # import torch2trt.converters.cat device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') torch.set_grad_enabled(False) cfg = cfg_mnet retina_trained_model = "./weights/mobilenet0.25_Final.pth" use_cpu = False # cfg = cfg_re50 retina_net = RetinaFace(cfg=cfg, phase='test') retina_net = load_model(retina_net, retina_trained_model, use_cpu) retina_net.eval() cudnn.benchmark = True retina_net = retina_net.to(device) def main(img_path): test_img = cv2.imread(img_path) resize = 1 im, im_width, im_height, scale = image_process(test_img, device) print(im.shape) model = torch2trt(retina_net, [im], fp16_mode=True, max_workspace_size=100000) tic = time.time() loc, conf, landms = model(im) print('net forward time: {:.4f}'.format(time.time() - tic)) result_data = process_face_data(cfg, im, im_height, im_width, loc, scale, conf, landms, resize)
def main(): args = get_args() torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() print("Finished loading model!") print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) resize = 1 # testing begin for _ in range(100): image_path = "./curve/test.jpg" img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width = img.shape[:2] scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print("net forward time: {:.4f}".format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"]) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"]) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image name = "test.jpg" cv2.imwrite(name, img_raw)
class FaceDetector(): def __init__(self): # TODO: add initialization logic torch.set_grad_enabled(False) self.cfg = None if args.network == "mobile0.25": self.cfg = cfg_mnet elif args.network == "resnet50": self.cfg = cfg_re50 elif args.network == "resnet18": self.cfg = cfg_re18 elif args.network == "resnet34": self.cfg = cfg_re34 # net and model self.net = RetinaFace(cfg=self.cfg, phase='test') # self.net = load_model(self.net, args.trained_model, args.cpu) self.net.eval() print('Finished loading model!') print(self.net) cudnn.benchmark = True self.device = torch.device("cpu" if args.cpu else "cuda") self.net = self.net.to(self.device) self.resize = 1 def detect_image(self, img) -> List[FaceDetection]: # TODO: add detect logic for single image print(np.shape(img)) tic = time.time() img = np.float32(img) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf, landms = self.net(img) # forward pass priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] dets = dets[:args.keep_top_k, :] # show image box_list = [] for b in dets: if b[4] < args.vis_thres: continue score = b[4] b = list(map(int, b)) box_list.append(FaceDetection(b[0], b[1], b[2], b[3], 0, score)) print('net forward time: {:.4f}'.format(time.time() - tic)) return box_list def detect_images(self, imgs) -> List[List[FaceDetection]]: boxes_list = [] for img in imgs: boxes = self.detect_image(img) boxes_list.append(boxes) return boxes_list def visualize(self, image, detection_list: List[FaceDetection], color=(0,0,255), thickness=5): img = image.copy() for detection in detection_list: bbox = detection.bbox p1 = bbox.left, bbox.top p2 = bbox.right, bbox.bottom cv2.rectangle(img, p1, p2, color, thickness=thickness, lineType=cv2.LINE_AA) return img
def run(args): # net and load cfg = cfg_mnet net = RetinaFace(cfg=cfg, phase='test') new_state_dict = load_normal(args.trained_model) net.load_state_dict(new_state_dict) print('Finished loading model!') print(net) torch.set_grad_enabled(False) device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) input = torch.randn(1, 3, 270, 480).cuda() flops, params = profile(net, inputs=(input, )) print('flops:', flops, 'params:', params) # testing dataset with open(args.test_list_dir, 'r') as fr: test_dataset = fr.read().split() test_dataset.sort() _t = {'forward_pass': Timer(), 'misc': Timer()} # testing begin if not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) f_ = open(os.path.join(args.save_folder, 'vis_bbox.txt'), 'w') net.eval() for i, image_path in enumerate(test_dataset): #img_name = os.path.split(image_path)[-1] img_name = image_path[image_path.find('datasets') + 9:] img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) # img_raw = cv2.resize(img_raw, None, fx=1./3, fy=1.0/3, interpolation=cv2.INTER_AREA) img = np.float32(img_raw) # testing scale target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t['forward_pass'].tic() loc, conf, landms = net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t['misc'].toc() # -------------------------------------------------------------------- save_name = os.path.join(args.save_folder, 'txt', img_name)[:-4] + '.txt' dirname = os.path.dirname(save_name) if not os.path.isdir(dirname): os.makedirs(dirname) with open(save_name, "w") as fd: bboxs = dets file_name = os.path.basename(save_name)[:-4] + "\n" bboxs_num = str(len(bboxs)) + "\n" fd.write(file_name) fd.write(bboxs_num) for box in bboxs: x = int(box[0]) y = int(box[1]) w = int(box[2]) - int(box[0]) h = int(box[3]) - int(box[1]) confidence = str(box[4]) line = str(x) + " " + str(y) + " " + str(w) + " " + str( h) + " " + confidence + " \n" fd.write(line) print('im_detect: {:d}/{:d}' ' forward_pass_time: {:.4f}s' ' misc: {:.4f}s' ' img_shape:{:}'.format(i + 1, len(test_dataset), _t['forward_pass'].average_time, _t['misc'].average_time, img.shape)) # save bbox-image line_write = save_image(dets, args.vis_thres, img_raw, args.save_folder, img_name, save_all=args.save_image_all) f_.write(line_write) f_.flush() f_.close()
class RetinaDetector: def __init__(self, network, confidence=0.02, top_k=5000, nms_thresh=0.4, keep_top_k=750, vis_thresh=0.6): torch.set_grad_enabled(False) self.confidence = confidence self.top_k = top_k self.nms_thresh = nms_thresh self.keep_top_k = keep_top_k self.vis_thresh = vis_thresh self.device = "cuda" if torch.cuda.is_available() else "cpu" if network == "resnet": self.cfg = cfg_re50 model_path = "weights/Resnet50_Final.pth" else: self.cfg = cfg_mnet model_path = "weights/mobilenet0.25_Final.pth" self.net = RetinaFace(cfg=self.cfg, phase='test') self.net = load_model(self.net, model_path, True if self.device == "cpu" else False) self.net.eval() print('Finished loading model!') print(self.net) cudnn.benchmark = True self.device = torch.device(self.device) self.net = self.net.to(self.device) def detect(self, frame): resize = 1 img = np.float32(frame) im_height, im_width, _ = img.shape scale = torch.Tensor([im_width, im_height, im_width, im_height]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf, landms = self.net(img) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze( 0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(self.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype( np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_thresh) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) results = [] for det in dets: r = {} r["point"] = {} r["point"]["x1"] = int(det[0]) r["point"]["y1"] = int(det[1]) r["point"]["x2"] = int(det[2]) r["point"]["y2"] = int(det[3]) r["confidence"] = det[4] r["landmark"] = {} r["landmark"]["p1_x"] = int(det[5]) r["landmark"]["p1_y"] = int(det[6]) r["landmark"]["p2_x"] = int(det[7]) r["landmark"]["p2_y"] = int(det[8]) r["landmark"]["p3_x"] = int(det[9]) r["landmark"]["p3_y"] = int(det[10]) r["landmark"]["p4_x"] = int(det[11]) r["landmark"]["p4_y"] = int(det[12]) r["landmark"]["p5_x"] = int(det[13]) r["landmark"]["p5_y"] = int(det[14]) results.append(r) return results def write_bbox(self, frame, results, confidence=True, landmark=True): frame_copy = np.copy(frame) for r in results: if r["confidence"] < self.vis_thresh: continue text = "{:.4f}".format(r["confidence"]) cv2.rectangle(frame_copy, (r["point"]["x1"], r["point"]["y1"]), (r["point"]["x2"], r["point"]["y2"]), (0, 0, 255), 2) if confidence: cx = r["point"]["x1"] cy = r["point"]["y1"] + 12 cv2.putText(frame_copy, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) if landmark: cv2.circle(frame_copy, (r["landmark"]["p1_x"], r["landmark"]["p1_y"]), 1, (0, 0, 255), 4) cv2.circle(frame_copy, (r["landmark"]["p2_x"], r["landmark"]["p2_y"]), 1, (0, 0, 255), 4) cv2.circle(frame_copy, (r["landmark"]["p3_x"], r["landmark"]["p3_y"]), 1, (0, 0, 255), 4) cv2.circle(frame_copy, (r["landmark"]["p4_x"], r["landmark"]["p4_y"]), 1, (0, 0, 255), 4) cv2.circle(frame_copy, (r["landmark"]["p5_x"], r["landmark"]["p5_y"]), 1, (0, 0, 255), 4) return frame_copy
class RetinaFaceDetector: def __init__(self, device, pretrained_model): self.device = device self.cfg = { 'name': 'Resnet50', 'min_sizes': [[16, 32], [64, 128], [256, 512]], 'steps': [8, 16, 32], 'variance': [0.1, 0.2], 'clip': False, 'loc_weight': 2.0, 'gpu_train': True, 'batch_size': 24, 'ngpu': 4, 'epoch': 100, 'decay1': 70, 'decay2': 90, 'image_size': 840, 'pretrain': True, 'return_layers': { 'layer2': 1, 'layer3': 2, 'layer4': 3 }, 'in_channel': 256, 'out_channel': 256 } self.net = RetinaFace(cfg=self.cfg, phase='test') self.net = load_model(self.net, pretrained_model, device) self.net.eval() self.net = self.net.to(device) def predict(self, image, confidence_threshold=0.02, top_k=5000, nms_threshold=0.4, keep_top_k=750): torch.set_grad_enabled(False) img = np.float32(image) resize = 1 im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf, landms = self.net(img) # forward pass # print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(self.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) bboxes, landmarks, confident_scores = dets[:, :4], dets[:, 5:], dets[:, 4] boxes = [] if bboxes is None: boxes = None else: for box in bboxes: x0, y0, x1, y1 = tuple(box.astype(int)) height, width = y1 - y0, x1 - x0 distance = max(height, width) if height < distance: gap = distance - height y0 -= gap / 2 y1 += gap / 2 elif width < distance: gap = distance - width x0 -= gap / 2 x1 += gap / 2 if y0 < 0: y1 -= y0 y0 = 0 if x0 < 0: x1 -= x0 x0 = 0 boxes.append([x0, y0, x1, y1]) boxes = np.array(boxes).astype(int) return boxes, landmarks.reshape(-1, 5, 2), confident_scores
class Inference(object): def __init__(self, weight_path, network, use_cpu=False): self.weight_path = weight_path self.network = network self.use_cpu = use_cpu self.resize = 1 self.confidence_threshold = 0.02 self.nms_threshold = 0.4 self.vis_thres = 0.5 self.input_height = 720 self.input_width = 1280 self._initialize_weight() self.scale = torch.Tensor([1280, 720, 1280, 720]).to(self.device) self.prior_data = self._initialize_priorbox(self.cfg, self.input_height, self.input_width) def _initialize_weight(self): self.cfg = None if self.network == "mobile0.25": self.cfg = cfg_mnet elif self.network == "resnet50": self.cfg = cfg_re50 self.net = RetinaFace(cfg=self.cfg, phase='test') self.net = self._load_model(self.net, self.weight_path, self.use_cpu) self.net.eval() print('Finished loading model!') print(self.net) cudnn.benchmark = True self.device = torch.device("cpu" if self.use_cpu else "cuda") print("self. device : ", self.device) self.net = self.net.to(self.device) def _initialize_priorbox(self, cfg, im_height, im_width): priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data return prior_data def _remove_prefix(self, state_dict, prefix): ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' print('remove prefix \'{}\''.format(prefix)) f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x return {f(key): value for key, value in state_dict.items()} def _check_keys(self, model, pretrained_state_dict): ckpt_keys = set(pretrained_state_dict.keys()) model_keys = set(model.state_dict().keys()) used_pretrained_keys = model_keys & ckpt_keys unused_pretrained_keys = ckpt_keys - model_keys missing_keys = model_keys - ckpt_keys print('Missing keys:{}'.format(len(missing_keys))) print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) print('Used keys:{}'.format(len(used_pretrained_keys))) assert len( used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' return True def _load_model(self, model, pretrained_path, load_to_cpu): print('Loading pretrained model from {}'.format(pretrained_path)) if load_to_cpu: pretrained_dict = torch.load( pretrained_path, map_location=lambda storage, loc: storage) else: device = torch.cuda.current_device() pretrained_dict = torch.load( pretrained_path, map_location=lambda storage, loc: storage.cuda(device)) if "state_dict" in pretrained_dict.keys(): pretrained_dict = self._remove_prefix( pretrained_dict['state_dict'], 'module.') else: pretrained_dict = self._remove_prefix(pretrained_dict, 'module.') self._check_keys(model, pretrained_dict) model.load_state_dict(pretrained_dict, strict=False) return model def _forward(self, img_raw): # img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) if img_raw is None: print("img is None") return None, None, None img = np.float32(img_raw) if self.resize != 1: img = cv2.resize(img, None, None, fx=self.resize, fy=self.resize, interpolation=cv2.INTER_LINEAR) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) loc, conf, landms = self.net(img) # forward pass # decode boxes boxes = decode(loc.data.squeeze(0), self.prior_data, self.cfg['variance']) boxes = boxes * self.scale / self.resize boxes = boxes.cpu().numpy() # scores scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # landmarks landms = decode_landm(landms.data.squeeze(0), self.prior_data, self.cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(self.device) landms = landms * scale1 / self.resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) dets = dets[keep, :] landms = landms[keep] dets = np.concatenate((dets, landms), axis=1) boxes_list = [] socres_list = [] landmarks_list = [] for b in dets: if b[4] < self.vis_thres: continue s = b[4] b = list(map(int, b)) boxes_list.append([b[0], b[1], b[2], b[3]]) socres_list.append(s) landmarks_list.append([ b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12], b[13], b[14] ]) return boxes_list, socres_list, landmarks_list def __call__(self, img_raw): return self._forward(img_raw)
def wxf(img): cap = cv2.VideoCapture(img) cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() # print('Finished loading model!') # print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) while (1): ret, imgre = cap.read() if not ret: print('Video open error.') break img = np.float32(imgre) target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(imgre, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 # cv2.putText(imgre, text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # # landms # cv2.circle(imgre, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(imgre, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(imgre, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(imgre, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(imgre, (b[13], b[14]), 1, (255, 0, 0), 4) #img = numpy.array(img) cv2.imshow('wyfRetinaface', imgre) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
# if args.cpu: # pretrained_dict = torch.load( # args.trained_model, map_location=lambda storage, loc: storage) # else: # device = torch.cuda.current_device() # pretrained_dict = torch.load( # args.trained_model, map_location=lambda storage, loc: storage.cuda(device)) # net.load_state_dict(pretrained_dict) # net.eval() net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') cudnn.benchmark = True device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net.to(device) resize = 1 # Hyperparameter img_raw = cv.imread("demo.jpg", cv.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device)
def main(): args = get_args() torch.set_grad_enabled(False) if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 else: raise NotImplementedError( f"Only mobile0.25 and resnet50 are suppoted.") # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() if args.fp16: net = net.half() print("Finished loading model!") cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) file_paths = sorted(args.input_path.rglob("*.jpg")) if args.num_gpu is not None: start, end = split_array(len(file_paths), args.num_gpu, args.gpu_id) file_paths = file_paths[start:end] output_path = args.output_path if args.save_boxes: output_label_path = output_path / "labels" output_label_path.mkdir(exist_ok=True, parents=True) if args.save_crops: output_image_path = output_path / "images" output_image_path.mkdir(exist_ok=True, parents=True) transform = albu.Compose([ albu.Normalize( p=1, mean=(104, 117, 123), std=(1.0, 1.0, 1.0), max_pixel_value=1) ], p=1) test_loader = DataLoader( InferenceDataset(file_paths, args.origin_size, transform=transform), batch_size=args.batch_size, num_workers=args.num_workers, pin_memory=True, drop_last=False, ) with torch.no_grad(): for raw_input in tqdm(test_loader): torched_images = raw_input["torched_image"] if args.fp16: torched_images = torched_images.half() resizes = raw_input["resize"] image_paths = Path(raw_input["image_path"]) raw_images = raw_input["raw_image"] labels = [] if (args.batch_size == 1 and args.save_boxes and (output_label_path / f"{Path(image_paths[0]).stem}.json").exists()): continue loc, conf, land = net(torched_images.to(device)) # forward pass batch_size = torched_images.shape[0] image_height, image_width = torched_images.shape[2:] scale1 = torch.Tensor([ image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, ]) scale1 = scale1.to(device) scale = torch.Tensor( [image_width, image_height, image_width, image_height]) scale = scale.to(device) priorbox = PriorBox(cfg, image_size=(image_height, image_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data for batch_id in range(batch_size): image_path = image_paths[batch_id] file_id = Path(image_path).stem raw_image = raw_images[batch_id] resize = resizes[batch_id].float() boxes = decode(loc.data[batch_id], prior_data, cfg["variance"]) boxes *= scale / resize scores = conf[batch_id][:, 1] landmarks = decode_landm(land.data[batch_id], prior_data, cfg["variance"]) landmarks *= scale1 / resize # ignore low scores valid_index = torch.where( scores > args.confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] order = scores.argsort(descending=True) boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS keep = nms(boxes, scores, args.nms_threshold) boxes = boxes[keep, :].int() landmarks = landmarks[keep].int() if boxes.shape[0] == 0: continue scores = scores[keep].cpu().numpy().astype(np.float64) for crop_id, bbox in enumerate(boxes): bbox = bbox.cpu().numpy() labels += [{ "crop_id": crop_id, "bbox": bbox.tolist(), "score": scores[crop_id], "landmarks": landmarks[crop_id].tolist(), }] if args.save_crops: x_min, y_min, x_max, y_max = bbox x_min = max(0, x_min) y_min = max(0, y_min) crop = raw_image[y_min:y_max, x_min:x_max].cpu().numpy() target_folder = output_image_path / f"{file_id}" target_folder.mkdir(exist_ok=True, parents=True) crop_file_path = target_folder / f"{file_id}_{crop_id}.jpg" if crop_file_path.exists(): continue cv2.imwrite( str(crop_file_path), cv2.cvtColor(crop, cv2.COLOR_BGR2RGB), [int(cv2.IMWRITE_JPEG_QUALITY), 90], ) if args.save_boxes: result = { "file_path": image_path, "file_id": file_id, "bboxes": labels, } with open(output_label_path / f"{file_id}.json", "w") as f: json.dump(result, f, indent=2)
if __name__ == '__main__': torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) resize = 1 for i in range(1): image_path = "nano.jpg" img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) print("The original image shape is ", img_raw.shape) img = np.float32(img_raw) im_height, im_width, _ = img.shape print("The image shape is ", img.shape) scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0)
def wxf(imgpath): print(imgpath) torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() #print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) image_path = imgpath img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # name = rlsb.sb(imgpath) # save image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue # text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) # cx = b[0] # cy = b[1] + 12 # # # cv2.putText(img_raw, text, (cx, cy), # cv2.FONT_HERSHEY_SCRIPT_COMPLEX, 0.5, (255, 255, 255)) # # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if not os.path.exists("./results/"): os.makedirs("./results/") name = "./results/" + "wxf" + ".jpg" cv2.imwrite(name, img_raw) return name