def main(): args = arg_parse() return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load 训练好的权重文件 retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # 读取文件 img = cv2.imread(args.input) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = torch.from_numpy(img) img = img.permute(2, 0, 1) # 通道转换 h, w, c -> c, h, w # 进入网络 input_img = img.unsqueeze(0).float().cuda() #扩展维度 picked_boxes, picked_scores = get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) font = cv2.FONT_HERSHEY_SIMPLEX # 设置字体 for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, score in zip(boxes, picked_scores[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) cv2.putText(img, text=str(score.item())[:5], org=(box[0], box[1]), fontFace=font, fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)) # 保存并展示处理后的图像 cv2.imwrite(args.output, img) cv2.imshow('RetinaFace-Pytorch', img) cv2.waitKey() #
def main(): args = get_args() # Create torchvision model return_layers = {'layer2':1,'layer3':2,'layer4':3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) print(pre_state_dict['module.body.conv1.weight'].cpu().detach().numpy()) pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict} RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read image img = skimage.io.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2,0,1) if not args.scale == 1.0: size1 = int(img.shape[1]/args.scale) size2 = int(img.shape[2]/args.scale) img = resize(img.float(),(size1,size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1,2,0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB) font = cv2.FONT_HERSHEY_SIMPLEX for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark, score in zip(boxes,picked_landmarks[j],picked_scores[j]): cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2) cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2) cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2) cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2) cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2) cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2) cv2.putText(img, text=str(score.item())[:5], org=(box[0],box[1]), fontFace=font, fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)) image_name = args.image_path.split('/')[-1] save_path = os.path.join(args.save_path,image_name) #cv2.imwrite(save_path, img) cv2.imshow('RetinaFace-Pytorch',img) cv2.waitKey()
def main(): args = get_args() if not os.path.exists(args.save_path): os.mkdir(args.save_path) log_path = os.path.join(args.save_path, 'log') if not os.path.exists(log_path): os.mkdir(log_path) writer = SummaryWriter(log_dir=log_path) data_path = args.data_path train_path = os.path.join(data_path, 'train/label.txt') val_path = os.path.join(data_path, 'val/label.txt') # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),RandomFlip()])) dataset_train = TrainDataset(train_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_train = DataLoader(dataset_train, num_workers=8, batch_size=args.batch, collate_fn=collater, shuffle=True) # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()])) dataset_val = ValDataset(val_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_val = DataLoader(dataset_val, num_workers=8, batch_size=args.batch, collate_fn=collater) total_batch = len(dataloader_train) # Create the model # if args.depth == 18: # retinaface = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # retinaface = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # retinaface = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # retinaface = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # retinaface = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} retinaface = torchvision_model.create_retinaface(return_layers) retinaface = retinaface.cuda() retinaface = torch.nn.DataParallel(retinaface).cuda() retinaface.training = True optimizer = optim.Adam(retinaface.parameters(), lr=1e-3) # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1) print('Start to train.') epoch_loss = [] iteration = 0 for epoch in range(args.epochs): retinaface.train() #print('Current learning rate:',scheduler.get_lr()[0]) # retinaface.module.freeze_bn() # retinaface.module.freeze_first_layer() # Training for iter_num, data in enumerate(dataloader_train): optimizer.zero_grad() classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() bbox_regression_loss = bbox_regression_loss.mean() ldm_regression_loss = ldm_regression_loss.mean() # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss loss = classification_loss + bbox_regression_loss + ldm_regression_loss loss.backward() optimizer.step() #epoch_loss.append(loss.item()) if iter_num % args.verbose == 0: log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % ( epoch, args.epochs, iter_num, total_batch) table_data = [['loss name', 'value'], ['total_loss', str(loss.item())], [ 'classification', str(classification_loss.item()) ], ['bbox', str(bbox_regression_loss.item())], ['landmarks', str(ldm_regression_loss.item())]] table = AsciiTable(table_data) #table = SingleTable(table_data) #table = DoubleTable(table_data) log_str += table.table print(log_str) # write the log to tensorboard writer.add_scalars( 'losses:', { 'total_loss': loss.item(), 'cls_loss': classification_loss.item(), 'bbox_loss': bbox_regression_loss.item(), 'ldm_loss': ldm_regression_loss.item() }, iteration * args.verbose) iteration += 1 #scheduler.step() #scheduler.step(np.mean(epoch_loss)) # Eval if epoch % args.eval_step == 0: print('-------- RetinaFace Pytorch --------') print('Evaluating epoch {}'.format(epoch)) recall, precision = eval_widerface.evaluate( dataloader_val, retinaface) print('Recall:', recall) print('Precision:', precision) # Save model if (epoch + 1) % args.save_step == 0: torch.save(retinaface.state_dict(), args.save_path + '/model_epoch_{}.pt'.format(epoch + 1))
def main(nummmmmm): args = get_args() # Create the model # if args.depth == 18: # RetinaFace = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # RetinaFace = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # RetinaFace = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # RetinaFace = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # RetinaFace = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load('stage_5_68_full_model_epoch_121.pt', map_location='cpu') pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace.to(device) import time video = cv2.VideoCapture(0) # Read image while True: start = time.time() ret, img = video.read() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = torch.from_numpy(img) img = img.permute(2, 0, 1) resized_img = img.float() # resized_img = resize(img.float(),(360,640)) # print(resized_img.shape) input_img = resized_img.float().unsqueeze(0) picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # print(picked_boxes) np_img = resized_img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) for i in range(0, 136, 2): cv2.circle(img, (landmark[i], landmark[i + 1]), radius=1, color=(0, 0, 255), thickness=2) cv2.imshow('RetinaFace-Pytorch', img) print(time.time() - start) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(): precision_global = 0 args = get_args() if not os.path.exists(args.save_path): os.mkdir(args.save_path) log_path = os.path.join(args.save_path, 'log') if not os.path.exists(log_path): os.mkdir(log_path) writer = SummaryWriter(log_dir=log_path) data_path = args.data_path train_path = os.path.join( data_path, 'retina-train-splitTrain.txt') #"train\\label.txt")#'train.txt') val_path = os.path.join( data_path, "retina-train-splitTest.txt" ) #"retina-train-splitTest.txt") #'retina-val.txt')##'val.txt') # train_path = os.path.join(data_path,'train\\label.txt')#"train\\label.txt")#'train.txt') # val_path = os.path.join(data_path,'val\\label.txt')#"val\\label.txt")#'val.txt') # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),RandomFlip()])) dataset_train = TrainDataset(train_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_train = DataLoader(dataset_train, num_workers=6, batch_size=args.batch, collate_fn=collater, shuffle=True) # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()])) dataset_val = ValDataset(val_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_val = DataLoader(dataset_val, num_workers=8, batch_size=args.batch, collate_fn=collater) total_batch = len(dataloader_train) # Create the model # if args.depth == 18: # retinaface = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # retinaface = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # retinaface = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # retinaface = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # retinaface = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} retinaface = torchvision_model.create_retinaface(return_layers) # Load trained model if (args.model_path is not None): retina_dict = retinaface.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } retinaface.load_state_dict(pretrained_dict) retinaface = retinaface.cuda() retinaface = torch.nn.DataParallel(retinaface).cuda() retinaface.training = True optimizer = optim.Adam(retinaface.parameters(), lr=1e-3) # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1) #performance detect # print('-------- RetinaFace Pytorch --------') # recall, precision = eval_widerface.evaluate(dataloader_val, retinaface) # print('Recall:', recall) # print('Precision:', precision, "best Precision:", precision_global) print('Start to train.') epoch_loss = [] iteration = 0 for epoch in range(args.epochs): retinaface.train() # Training for iter_num, data in enumerate(dataloader_train): #ff = data["img"].numpy() #print(ff[0][1][320][320]) optimizer.zero_grad() classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() bbox_regression_loss = bbox_regression_loss.mean() ldm_regression_loss = ldm_regression_loss.mean() # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss loss = classification_loss + bbox_regression_loss + ldm_regression_loss loss.backward() optimizer.step() if iter_num % args.verbose == 0: log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % ( epoch, args.epochs, iter_num, total_batch) table_data = [['loss name', 'value'], ['total_loss', str(loss.item())], [ 'classification', str(classification_loss.item()) ], ['bbox', str(bbox_regression_loss.item())], ['landmarks', str(ldm_regression_loss.item())]] table = AsciiTable(table_data) log_str += table.table print(log_str) # write the log to tensorboard writer.add_scalar('losses:', loss.item(), iteration * args.verbose) writer.add_scalar('class losses:', classification_loss.item(), iteration * args.verbose) writer.add_scalar('box losses:', bbox_regression_loss.item(), iteration * args.verbose) writer.add_scalar('landmark losses:', ldm_regression_loss.item(), iteration * args.verbose) iteration += 1 # Eval if epoch % args.eval_step == 0: print('-------- RetinaFace Pytorch --------') print('Evaluating epoch {}'.format(epoch)) recall, precision = eval_widerface.evaluate( dataloader_val, retinaface) if (precision_global < precision): precision_global = precision torch.save( retinaface.state_dict(), args.save_path + '/model_Best_epoch_{}.pt'.format(epoch + 1)) print('Recall:', recall) print('Precision:', precision, "best Precision:", precision_global) writer.add_scalar('Recall:', recall, epoch * args.eval_step) writer.add_scalar('Precision:', precision, epoch * args.eval_step) # Save model if (epoch + 1) % args.save_step == 0: torch.save(retinaface.state_dict(), args.save_path + '/model_epoch_{}.pt'.format(epoch + 1)) writer.close()
def main(): args = get_args() # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read video cap = cv2.VideoCapture(args.video_path) codec = cv2.VideoWriter_fourcc(*'MJPG') width = int(cap.get(3)) height = int(cap.get(4)) cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) fps = 25.0 out = cv2.VideoWriter('args.save_path', codec, fps, (width, height)) font = cv2.FONT_HERSHEY_SIMPLEX while (True): ret, img = cap.read() if not ret: print('Video open error.') break img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = np_img.astype(np.uint8) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark, score in zip(boxes, picked_landmarks[j], picked_scores[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) cv2.putText(img, text=str(score.item())[:5], org=(box[0], box[1]), fontFace=font, fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)) out.write(img) cv2.imshow('RetinaFace-Pytorch', img) key = cv2.waitKey(1) if key == ord('q'): print('Now quit.') break cap.release() out.release() cv2.destroyAllWindows()
def main(): args = get_args() # Create torchvision model return_layers = {'layer2':1,'layer3':2,'layer4':3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = {k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict} RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() o = open("out.txt",'w') f = open(args.text_path, 'r') lines = f.readlines() for line in lines: o.write(line) print(line) line = line.rstrip() if line.startswith('#'): path = args.image_path_prefix+ line[2:].replace('/','\\') else: path = args.image_path_prefix+ line.replace('/','\\')+".jpg" if not os.path.exists(path): continue # Read image # change by yzk img1 = skimage.io.imread(path, as_gray=True).astype(np.float32) img = [] img2 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR) img = (img2 * 255.0).astype(np.uint8) #ori # img = skimage.io.imread(path) img = torch.from_numpy(img) img = img.permute(2,0,1) if not args.scale == 1.0: size1 = int(img.shape[1]/args.scale) size2 = int(img.shape[2]/args.scale) img = resize(img.float(),(size1,size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections(input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) #print(str(picked_boxes[0].shape[0])) if picked_boxes is None or picked_boxes[0] is None: o.write("0" + '\n') else: o.write(str(picked_boxes[0].shape[0]) + '\n') # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1,2,0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8),cv2.COLOR_BGR2RGB) font = cv2.FONT_HERSHEY_SIMPLEX for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark, score in zip(boxes,picked_landmarks[j],picked_scores[j]): ss = boxes.cpu().detach().numpy() print(str(box[0].cpu().detach().numpy())+" " + str(box[1].cpu().detach().numpy())+" " + str(box[2].cpu().detach().numpy()- box[0].cpu().detach().numpy())+" " + str(box[3].cpu().detach().numpy() -box[1].cpu().detach().numpy()) + " " + str(score.item()) + '\n') o.write(str(box[0].cpu().detach().numpy())+" " + str(box[1].cpu().detach().numpy())+" " + str(box[2].cpu().detach().numpy() - box[0].cpu().detach().numpy())+" " + str(box[3].cpu().detach().numpy() - box[1].cpu().detach().numpy()) + " " + str(score.item()) + '\n') cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),(0,0,255),thickness=2) cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2) cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2) cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2) cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2) cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2) cv2.putText(img, text=str(score.item())[:5], org=(box[0],box[1]), fontFace=font, fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)) image_name = path.split('\\')[-1] save_path = os.path.join(args.save_path,image_name) # cv2.imwrite(save_path, img) # cv2.imshow('RetinaFace-Pytorch',img) # cv2.waitKey() o.close()
def main(nummmmmm): args = get_args() # Create the model # if args.depth == 18: # RetinaFace = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # RetinaFace = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # RetinaFace = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # RetinaFace = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # RetinaFace = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load( '/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_51.pt') pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace.to(device) import time dataset_val = TrainDataset('./widerface/train/label.txt', transform=transforms.Compose( [Resizer(640), PadToSquare()])) # dataset_val = ValDataset('./widerface/train/label.txt') for qq in range(100, 150): img = dataset_val[qq]['img'] # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # img=skimage.io.imread("/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg".format(str(qq))) img = img.permute(2, 0, 1) resized_img = img.float() input_img = resized_img.unsqueeze(0).to(device) start = time.time() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.9, iou_threshold=0.2) print(time.time() - start) # print(picked_boxes) np_img = resized_img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) for i in range(0, 136, 2): cv2.circle(img, (landmark[i], landmark[i + 1]), radius=1, color=(0, 0, 255), thickness=2) image_name = args.image_path.split('/')[-1] save_path = os.path.join(args.save_path, image_name) cv2.imwrite('./RetinaFace-Pytorch{}.jpg'.format(qq), cv2.resize(img, (640, 640)))
def main(): args = get_args() # Create retinaface return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.f_model) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() print('Retinaface create success.') # Create hopenet Hopenet = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) saved_state_dict = torch.load(args.p_model) Hopenet.load_state_dict(saved_state_dict) Hopenet = Hopenet.cuda() Hopenet.eval() print('Hopenet create success.') idx_tensor = [idx for idx in range(66)] idx_tensor = torch.FloatTensor(idx_tensor).cuda() transformations = transforms.Compose([ transforms.Scale(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) if args.type == 'image': img = cv2.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = np_img.astype(np.uint8) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): # Crop face x_min = int(box[0]) x_max = int(box[2]) y_min = int(box[1]) y_max = int(box[3]) # Clip x_min = x_min if x_min > 0 else 0 x_max = x_max if x_max < img.shape[1] else img.shape[1] y_min = y_min if y_min > 0 else 0 y_max = y_max if y_max < img.shape[0] else img.shape[0] if not x_min < x_max or not y_min < y_max: continue bbox_height = abs(y_max - y_min) face_img = img[y_min:y_max, x_min:x_max] face_img = Image.fromarray(face_img) # Transform face_img = transformations(face_img) img_shape = face_img.size() face_img = face_img.view(1, img_shape[0], img_shape[1], img_shape[2]) face_img = face_img.cuda() yaw, pitch, roll = Hopenet(face_img) yaw_predicted = F.softmax(yaw) pitch_predicted = F.softmax(pitch) roll_predicted = F.softmax(roll) # Get continuous predictions in degrees. yaw_predicted = torch.sum( yaw_predicted.data[0] * idx_tensor) * 3 - 99 pitch_predicted = torch.sum( pitch_predicted.data[0] * idx_tensor) * 3 - 99 roll_predicted = torch.sum( roll_predicted.data[0] * idx_tensor) * 3 - 99 utils.draw_axis(img, yaw_predicted, pitch_predicted, roll_predicted, tdx=(x_min + x_max) / 2, tdy=(y_min + y_max) / 2, size=bbox_height / 2) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 255), thickness=2) # cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2) # cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2) # cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2) # cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2) # cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2) cv2.imshow('RetinaFace-Hopenet', img) key = cv2.waitKey() else: # Read video cap = cv2.VideoCapture(args.video_path) codec = cv2.VideoWriter_fourcc(*'MJPG') width = int(cap.get(3)) height = int(cap.get(4)) cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) fps = 25.0 out = cv2.VideoWriter(args.out, codec, fps, (width, height)) while (True): ret, img = cap.read() if not ret: print('Video open error.') break img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = np_img.astype(np.uint8) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): # Crop face x_min = int(box[0]) x_max = int(box[2]) y_min = int(box[1]) y_max = int(box[3]) # Clip x_min = x_min if x_min > 0 else 0 x_max = x_max if x_max < img.shape[1] else img.shape[1] y_min = y_min if y_min > 0 else 0 y_max = y_max if y_max < img.shape[0] else img.shape[0] if not x_min < x_max or not y_min < y_max: continue bbox_height = abs(y_max - y_min) face_img = img[y_min:y_max, x_min:x_max] face_img = Image.fromarray(face_img) # Transform face_img = transformations(face_img) img_shape = face_img.size() face_img = face_img.view(1, img_shape[0], img_shape[1], img_shape[2]) face_img = face_img.cuda() yaw, pitch, roll = Hopenet(face_img) yaw_predicted = F.softmax(yaw) pitch_predicted = F.softmax(pitch) roll_predicted = F.softmax(roll) # Get continuous predictions in degrees. yaw_predicted = torch.sum( yaw_predicted.data[0] * idx_tensor) * 3 - 99 pitch_predicted = torch.sum( pitch_predicted.data[0] * idx_tensor) * 3 - 99 roll_predicted = torch.sum( roll_predicted.data[0] * idx_tensor) * 3 - 99 utils.draw_axis(img, yaw_predicted, pitch_predicted, roll_predicted, tdx=(x_min + x_max) / 2, tdy=(y_min + y_max) / 2, size=bbox_height / 2) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 255), thickness=2) # cv2.rectangle(img,(x_min,y_min),(x_max,y_max),(255,0,255),thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) out.write(img) cv2.imshow('RetinaFace-Pytorch', img) key = cv2.waitKey(1) if key == ord('q'): print('Now quit.') break cap.release() out.release() cv2.destroyAllWindows()
def main(): args = get_args() if not os.path.exists(args.save_path): os.mkdir(args.save_path) log_path = os.path.join(args.save_path, 'log') if not os.path.exists(log_path): os.mkdir(log_path) data_path = args.data_path # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),()])) dataset_train = TrainDataset('./widerface/train/label.txt', transform=transforms.Compose([ RandomErasing(), RandomFlip(), Rotate(), Color(), Resizer(), PadToSquare() ])) # dataset_train = TrainDataset('./widerface/train/label.txt',transform=transforms.Compose([Resizer(),PadToSquare()])) dataloader_train = DataLoader(dataset_train, num_workers=8, batch_size=args.batch, collate_fn=collater, shuffle=True) # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()])) dataset_val = TrainDataset('./widerface/train/label.txt', transform=transforms.Compose( [Resizer(640), PadToSquare()])) dataloader_val = DataLoader(dataset_val, num_workers=8, batch_size=args.batch, collate_fn=collater) total_batch = len(dataloader_train) # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} retinaface = torchvision_model.create_retinaface(return_layers) retinaface_ = retinaface.cuda() retinaface = torch.nn.DataParallel(retinaface_).cuda() retinaface.training = True base_lr = 1e-7 # pre_train = torch.load('network.torch') # cur=retinaface.state_dict() # for k, v in cur.items(): # if k[12:] in pre_train: # print(k[12:]) # cur[k]=pre_train[k[12:]] # retinaface.load_state_dict(cur) retinaface.load_state_dict( torch.load( "/versa/elvishelvis/RetinaYang/out/stage_5_68_full_model_epoch_121.pt" )) lr = base_lr # optimizer=torch.optim.Adam(retinaface.parameters(),lr=lr) # fix encoder for name, value in retinaface.named_parameters(): if 'Landmark' in name: value.requires_grad = False lr_cos = lambda n: 0.5 * (1 + np.cos((n) / (args.epochs) * np.pi)) * base_lr params = filter(lambda p: p.requires_grad == True, retinaface.parameters()) body = filter(lambda p: p.requires_grad == False, retinaface.parameters()) optimizer = torch.optim.Adam([{ 'params': body, 'lr': lr * 3 }, { 'params': params, 'lr': lr }]) #evaluation the current model if (args.training == False): print("not pretrain") recall, precision, landmakr, miss = eval_widerface.evaluate( dataloader_val, retinaface) print('Recall:', recall) print('Precision:', precision) print("landmark: ", str(landmakr)) print("miss: " + str(miss)) return ## print('Start to train.') epoch_loss = [] iteration = 0 retinaface = retinaface.cuda() for epoch in range(args.epochs): lr = lr_cos(epoch) retinaface.train() # Training for iter_num, data in enumerate(dataloader_train): optimizer.zero_grad() classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() bbox_regression_loss = bbox_regression_loss.mean() ldm_regression_loss = ldm_regression_loss.mean() # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss loss = classification_loss + 0.15 * bbox_regression_loss + 0.25 * ldm_regression_loss loss.backward() optimizer.step() if iter_num % args.verbose == 0: log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % ( epoch, args.epochs, iter_num, total_batch) table_data = [['loss name', 'value'], ['total_loss', str(loss.item())], [ 'classification', str(classification_loss.item()) ], ['bbox', str(bbox_regression_loss.item())], ['landmarks', str(ldm_regression_loss.item())]] table = AsciiTable(table_data) log_str += table.table print(log_str) iteration += 1 # Eval if epoch % args.eval_step == 0: with open("aaa.txt", 'a') as f: f.write('-------- RetinaFace Pytorch --------' + '\n') f.write('Evaluating epoch {}'.format(epoch) + '\n') f.write('total_loss:' + str(loss.item()) + '\n') f.write('classification' + str(classification_loss.item()) + '\n') f.write('bbox' + str(bbox_regression_loss.item()) + '\n') f.write('landmarks' + str(ldm_regression_loss.item()) + '\n') f.close() print('-------- RetinaFace Pytorch --------') print('Evaluating epoch {}'.format(epoch)) recall, precision, landmakr, miss = eval_widerface.evaluate( dataloader_val, retinaface) print('Recall:', recall) print('Precision:', precision) print("landmark: ", str(landmakr)) print("miss: " + str(miss)) with open("aaa.txt", 'a') as f: f.write('-------- RetinaFace Pytorch --------(not pretrain)' + '\n') f.write('Evaluating epoch {}'.format(epoch) + '\n') f.write('Recall:' + str(recall) + '\n') f.write('Precision:' + str(precision) + '\n') f.write("landmark: " + str(landmakr) + '\n') f.write("miss: " + str(miss) + '\n') f.close() # Save model if (epoch) % args.save_step == 0: torch.save( retinaface.state_dict(), args.save_path + '/stage_5_68_full_model_epoch_{}.pt'.format(epoch + 1))
def main(): args = get_args() # Create the model # if args.depth == 18: # RetinaFace = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # RetinaFace = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # RetinaFace = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # RetinaFace = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # RetinaFace = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read image img = skimage.io.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2, 0, 1) padded_img, _ = pad_to_square(img, 0) resized_img = resize(padded_img.float(), (640, 640)) input_img = resized_img.unsqueeze(0).cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) np_img = resized_img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) image_name = args.image_path.split('/')[-1] save_path = os.path.join(args.save_path, image_name) cv2.imwrite(save_path, img) cv2.imshow('RetinaFace-Pytorch', img) cv2.waitKey()
def main(): args = get_args() if not os.path.exists(args.save_path): os.mkdir(args.save_path) log_path = os.path.join(args.save_path,'log') if not os.path.exists(log_path): os.mkdir(log_path) # # writer = SummaryWriter(log_dir=log_path) dataset_train = TrainDataset(transform=transforms.Compose([Rotate(),Resizer(),Color()])) len_train_set = int(len(dataset_train) * 0.7) len_val_set = len(dataset_train) - len_train_set train_set, val_set = random_split(dataset_train, [len_train_set, len_val_set]) dataloader_train = DataLoader(train_set, num_workers=8, batch_size=args.batch, collate_fn=collater,shuffle=True) dataloader_val = DataLoader(val_set, num_workers=8, batch_size=args.batch, collate_fn=collater) total_batch = len(dataloader_train) # Create torchvision model return_layers = {'layer2':1,'layer3':2,'layer4':3} retinaface = torchvision_model.create_retinaface(return_layers) retinaface = retinaface.cuda() base_lr=1e-4 lr = base_lr optimizer = optim.Adam(retinaface.parameters(), lr=lr) retinaface = torch.nn.DataParallel(retinaface).cuda() retinaface.training = True # retinaface.load_state_dict(torch.load("./pretrained.torch")) retinaface.load_state_dict(torch.load("./out/mnas_epoch__ori111124.pt")) lr_cos = lambda n: 0.5 * (1 + np.cos((n) / (args.epochs) * np.pi)) * base_lr # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1) print('Start to train.') # #### # print("pretrained") # recall, precision, landmakr,miss= eval_widerface.evaluate(dataloader_val,retinaface) # print('Recall:',recall) # print('Precision:',precision) # print("landmark: ",str(landmakr)) # print("miss: "+ str(miss)) # sdfsdfsdf # ### epoch_loss = [] iteration = 0 for epoch in range(args.epochs): lr=lr_cos(epoch) print("Current lr is {}".format(lr)) retinaface.train() #print('Current learning rate:',scheduler.get_lr()[0]) # retinaface.module.freeze_bn() # retinaface.module.freeze_first_layer() # Training for iter_num,data in enumerate(dataloader_train): optimizer.zero_grad() classification_loss, bbox_regression_loss,ldm_regression_loss = retinaface([data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() bbox_regression_loss = bbox_regression_loss.mean() ldm_regression_loss = ldm_regression_loss.mean() loss = classification_loss+0.1*ldm_regression_loss # loss = classification_loss + bbox_regression_loss + ldm_regression_loss loss.backward() optimizer.step() #epoch_loss.append(loss.item()) if iter_num % args.verbose == 0: log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % (epoch, args.epochs, iter_num, total_batch) table_data = [ ['loss name','value'], ['total_loss',str(loss.item())], ['classification',str(classification_loss.item())], ['bbox',str(bbox_regression_loss.item())], ['landmarks',str(ldm_regression_loss.item())] ] table = AsciiTable(table_data) #table = SingleTable(table_data) #table = DoubleTable(table_data) log_str +=table.table print(log_str) # write the log to tensorboard # writer.add_scalar('losses:',loss.item(),iteration*args.verbose) # writer.add_scalar('class losses:',classification_loss.item(),iteration*args.verbose) # writer.add_scalar('box losses:',bbox_regression_loss.item(),iteration*args.verbose) # writer.add_scalar('landmark losses:',ldm_regression_loss.item(),iteration*args.verbose) iteration +=1 #scheduler.step() #scheduler.step(np.mean(epoch_loss)) # Eval if epoch % args.eval_step == 0: print('-------- RetinaFace Pytorch --------') print ('Evaluating epoch {}'.format(epoch)) recall, precision, landmakr,miss= eval_widerface.evaluate(dataloader_val,retinaface) print('Recall:',recall) print('Precision:',precision) print("landmark: ",str(landmakr)) print("miss: "+ str(miss)) with open("bbb.txt", 'a') as f: f.write('-------- RetinaFace Pytorch --------(pretrain)'+'\n') f.write ('Evaluating epoch {}'.format(epoch)+'\n') f.write('Recall:'+str(recall)+'\n') f.write('Precision:'+str(precision)+'\n') f.write("landmark: "+str(landmakr)+'\n') f.write("miss: "+ str(miss)+'\n') f.close() # writer.add_scalar('Recall:', recall, epoch*args.eval_step) # writer.add_scalar('Precision:', precision, epoch*args.eval_step) # Save model if (epoch + 1) % args.save_step == 0: torch.save(retinaface.state_dict(), args.save_path + '/pretrain{}.pt'.format(epoch + 1+5+1112222211100))
def main(): args = get_args() if not os.path.exists(args.save_path): os.mkdir(args.save_path) log_path = os.path.join(args.save_path, 'log') if not os.path.exists(log_path): os.mkdir(log_path) writer = SummaryWriter(log_dir=log_path) data_path = args.data_path train_path = os.path.join(data_path, 'train/label.txt') val_path = os.path.join(data_path, 'val/label.txt') dataloader_train, dataloader_test = load_data(train_path, args.batch_size, split_train_test=True) dataloader_val = load_data(val_path, args.batch_size) total_batch = len(dataloader_train) # Create torchvision model retinaface = torchvision_model.create_retinaface().cuda() retinaface = torch.nn.DataParallel(retinaface).cuda() retinaface.training = True optimizer = optim.Adam(retinaface.parameters(), lr=1e-3) print('Start to train.') epoch_loss = [] iteration = 0 for epoch in range(args.epochs): retinaface.train() # Training for iter_num, data in enumerate(dataloader_train): optimizer.zero_grad() classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() bbox_regression_loss = bbox_regression_loss.mean() ldm_regression_loss = ldm_regression_loss.mean() # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss loss = classification_loss + bbox_regression_loss + 0.5 * ldm_regression_loss loss.backward() optimizer.step() if iter_num % args.verbose == 0: log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % ( epoch, args.epochs, iter_num, total_batch) table_data = [['loss name', 'value'], ['total_loss', str(loss.item())], [ 'classification', str(classification_loss.item()) ], ['bbox', str(bbox_regression_loss.item())], ['landmarks', str(ldm_regression_loss.item())]] table = AsciiTable(table_data) log_str += table.table print("train loses:") print(log_str) # write the log to tensorboard writer.add_scalar('losses:', loss.item(), iteration * args.verbose) writer.add_scalar('class losses:', classification_loss.item(), iteration * args.verbose) writer.add_scalar('box losses:', bbox_regression_loss.item(), iteration * args.verbose) writer.add_scalar('landmark losses:', ldm_regression_loss.item(), iteration * args.verbose) iteration += 1 validate(dataloader_test, retinaface) # Eval if epoch % args.eval_step == 0: print('-------- RetinaFace --------') print('Evaluating epoch {}'.format(epoch)) recall, precision = eval_widerface.evaluate( dataloader_val, retinaface) print('Recall:', recall) print('Precision:', precision) writer.add_scalar('Recall:', recall, epoch * args.eval_step) writer.add_scalar('Precision:', precision, epoch * args.eval_step) # Save model if (epoch + 1) % args.save_step == 0: torch.save(retinaface.state_dict(), args.save_path + '/model_epoch_{}.pt'.format(epoch + 1)) writer.close()
def main(): args = get_args() # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() vc = cv2.VideoCapture(0) while True: # 循环读取视频帧 rval, img_raw = vc.read() # Read image img = skimage.io.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) image_name = args.image_path.split('/')[-1] # save_path = os.path.join(args.save_path,image_name) # cv2.imwrite(save_path, img) cv2.imshow('RetinaFace-Pytorch', img) cv2.waitKey()
def main(): args = get_args() # Create the model # if args.depth == 18: # RetinaFace = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # RetinaFace = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # RetinaFace = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # RetinaFace = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # RetinaFace = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace.cuda() import time start = time.time() for qq in range(400, 500): img = skimage.io.imread( "/versa/elvishelvis/RetinaFace_Pytorch/CelebA/Img/img_celeba.7z/img_celeba/118{}.jpg" .format(str(qq))) print(img.shape) # img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = torch.from_numpy(img) img = img.permute(2, 0, 1) resized_img = resize(img.float(), (320, 320)) input_img = resized_img.unsqueeze(0).cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # print(picked_boxes) np_img = resized_img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) for i in range(0, 10, 2): cv2.circle(img, (landmark[i], landmark[i + 1]), radius=1, color=(0, 0, 255), thickness=2) image_name = args.image_path.split('/')[-1] save_path = os.path.join(args.save_path, image_name) cv2.imwrite('RetinaFace-Pytorch{}.jpg'.format(qq), cv2.resize(img, (640, 640))) print(time.time() - start)
def detect_img(img): args = get_args() # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read image img = torch.from_numpy(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks, picked_scores = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) font = cv2.FONT_HERSHEY_SIMPLEX for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark, score in zip(boxes, picked_landmarks[j], picked_scores[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) ''' x = (landmark[0] + landmark[2]) / 2 y = landmark[3] - (landmark[3] - box[1]) / 3 cv2.circle(img,(x,y),radius=5,color=(0,0,255),thickness=1) ''' ''' start_point_x = (landmark[0] + landmark[2]) / 2 start_point_y = (landmark[1] + landmark[3]) / 2 end_point_x = (landmark[8] + landmark[6]) / 2 end_point_y = (landmark[9] + landmark[7]) / 2 cv2.line(img , (start_point_x,start_point_y),(landmark[4],landmark[5]),color=(255,100,0),thickness=2) cv2.line(img , (landmark[4],landmark[5]),(end_point_x,end_point_y),color=(255,255,100),thickness=2) ''' ''' cv2.rectangle(img,(landmark[0],landmark[1]),(landmark[8],landmark[9]),(0,0,100),thickness=2) ''' cv2.putText(img, text=str(score.item())[:5], org=(box[0], box[1]), fontFace=font, fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)) return img