def get_model(model_path): return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() return RetinaFace
def main(): args = get_args() if not os.path.exists(args.save_path): os.mkdir(args.save_path) log_path = os.path.join(args.save_path, 'log') if not os.path.exists(log_path): os.mkdir(log_path) writer = SummaryWriter(log_dir=log_path) data_path = args.data_path train_path = os.path.join(data_path, 'train/label.txt') val_path = os.path.join(data_path, 'val/label.txt') # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),RandomFlip()])) dataset_train = TrainDataset(train_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_train = DataLoader(dataset_train, num_workers=8, batch_size=args.batch, collate_fn=collater, shuffle=True) # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()])) dataset_val = ValDataset(val_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_val = DataLoader(dataset_val, num_workers=8, batch_size=args.batch, collate_fn=collater) total_batch = len(dataloader_train) # Create the model # if args.depth == 18: # retinaface = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # retinaface = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # retinaface = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # retinaface = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # retinaface = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} retinaface = torchvision_model.create_retinaface(return_layers) retinaface = retinaface.cuda() retinaface = torch.nn.DataParallel(retinaface).cuda() retinaface.training = True optimizer = optim.Adam(retinaface.parameters(), lr=1e-3) # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1) print('Start to train.') epoch_loss = [] iteration = 0 for epoch in range(args.epochs): retinaface.train() # Training for iter_num, data in enumerate(dataloader_train): optimizer.zero_grad() classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() bbox_regression_loss = bbox_regression_loss.mean() ldm_regression_loss = ldm_regression_loss.mean() # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss loss = classification_loss + bbox_regression_loss + ldm_regression_loss loss.backward() optimizer.step() if iter_num % args.verbose == 0: log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % ( epoch, args.epochs, iter_num, total_batch) table_data = [['loss name', 'value'], ['total_loss', str(loss.item())], [ 'classification', str(classification_loss.item()) ], ['bbox', str(bbox_regression_loss.item())], ['landmarks', str(ldm_regression_loss.item())]] table = AsciiTable(table_data) log_str += table.table print(log_str) # write the log to tensorboard writer.add_scalar('losses:', loss.item(), iteration * args.verbose) writer.add_scalar('class losses:', classification_loss.item(), iteration * args.verbose) writer.add_scalar('box losses:', bbox_regression_loss.item(), iteration * args.verbose) writer.add_scalar('landmark losses:', ldm_regression_loss.item(), iteration * args.verbose) iteration += 1 # Eval if epoch % args.eval_step == 0: print('-------- RetinaFace Pytorch --------') print('Evaluating epoch {}'.format(epoch)) recall, precision = eval_widerface.evaluate( dataloader_val, retinaface) print('Recall:', recall) print('Precision:', precision) writer.add_scalar('Recall:', recall, epoch * args.eval_step) writer.add_scalar('Precision:', precision, epoch * args.eval_step) # Save model if (epoch + 1) % args.save_step == 0: torch.save(retinaface.state_dict(), args.save_path + '/model_epoch_{}.pt'.format(epoch + 1)) writer.close()
def main(): args = get_args() # Create retinaface return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.f_model) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() print('Retinaface create success.') # Create hopenet Hopenet = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) saved_state_dict = torch.load(args.p_model) Hopenet.load_state_dict(saved_state_dict) Hopenet = Hopenet.cuda() Hopenet.eval() print('Hopenet create success.') idx_tensor = [idx for idx in range(66)] idx_tensor = torch.FloatTensor(idx_tensor).cuda() transformations = transforms.Compose([ transforms.Scale(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) if args.type == 'image': img = cv2.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = np_img.astype(np.uint8) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): # Crop face x_min = int(box[0]) x_max = int(box[2]) y_min = int(box[1]) y_max = int(box[3]) # Clip x_min = x_min if x_min > 0 else 0 x_max = x_max if x_max < img.shape[1] else img.shape[1] y_min = y_min if y_min > 0 else 0 y_max = y_max if y_max < img.shape[0] else img.shape[0] if not x_min < x_max or not y_min < y_max: continue bbox_height = abs(y_max - y_min) face_img = img[y_min:y_max, x_min:x_max] face_img = Image.fromarray(face_img) # Transform face_img = transformations(face_img) img_shape = face_img.size() face_img = face_img.view(1, img_shape[0], img_shape[1], img_shape[2]) face_img = face_img.cuda() yaw, pitch, roll = Hopenet(face_img) yaw_predicted = F.softmax(yaw) pitch_predicted = F.softmax(pitch) roll_predicted = F.softmax(roll) # Get continuous predictions in degrees. yaw_predicted = torch.sum( yaw_predicted.data[0] * idx_tensor) * 3 - 99 pitch_predicted = torch.sum( pitch_predicted.data[0] * idx_tensor) * 3 - 99 roll_predicted = torch.sum( roll_predicted.data[0] * idx_tensor) * 3 - 99 utils.draw_axis(img, yaw_predicted, pitch_predicted, roll_predicted, tdx=(x_min + x_max) / 2, tdy=(y_min + y_max) / 2, size=bbox_height / 2) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 255), thickness=2) # cv2.circle(img,(landmark[0],landmark[1]),radius=1,color=(0,0,255),thickness=2) # cv2.circle(img,(landmark[2],landmark[3]),radius=1,color=(0,255,0),thickness=2) # cv2.circle(img,(landmark[4],landmark[5]),radius=1,color=(255,0,0),thickness=2) # cv2.circle(img,(landmark[6],landmark[7]),radius=1,color=(0,255,255),thickness=2) # cv2.circle(img,(landmark[8],landmark[9]),radius=1,color=(255,255,0),thickness=2) cv2.imshow('RetinaFace-Hopenet', img) key = cv2.waitKey() else: # Read video cap = cv2.VideoCapture(args.video_path) codec = cv2.VideoWriter_fourcc(*'MJPG') width = int(cap.get(3)) height = int(cap.get(4)) cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) fps = 25.0 out = cv2.VideoWriter(args.out, codec, fps, (width, height)) while (True): ret, img = cap.read() if not ret: print('Video open error.') break img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = np_img.astype(np.uint8) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): # Crop face x_min = int(box[0]) x_max = int(box[2]) y_min = int(box[1]) y_max = int(box[3]) # Clip x_min = x_min if x_min > 0 else 0 x_max = x_max if x_max < img.shape[1] else img.shape[1] y_min = y_min if y_min > 0 else 0 y_max = y_max if y_max < img.shape[0] else img.shape[0] if not x_min < x_max or not y_min < y_max: continue bbox_height = abs(y_max - y_min) face_img = img[y_min:y_max, x_min:x_max] face_img = Image.fromarray(face_img) # Transform face_img = transformations(face_img) img_shape = face_img.size() face_img = face_img.view(1, img_shape[0], img_shape[1], img_shape[2]) face_img = face_img.cuda() yaw, pitch, roll = Hopenet(face_img) yaw_predicted = F.softmax(yaw) pitch_predicted = F.softmax(pitch) roll_predicted = F.softmax(roll) # Get continuous predictions in degrees. yaw_predicted = torch.sum( yaw_predicted.data[0] * idx_tensor) * 3 - 99 pitch_predicted = torch.sum( pitch_predicted.data[0] * idx_tensor) * 3 - 99 roll_predicted = torch.sum( roll_predicted.data[0] * idx_tensor) * 3 - 99 utils.draw_axis(img, yaw_predicted, pitch_predicted, roll_predicted, tdx=(x_min + x_max) / 2, tdy=(y_min + y_max) / 2, size=bbox_height / 2) cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 255), thickness=2) # cv2.rectangle(img,(x_min,y_min),(x_max,y_max),(255,0,255),thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) out.write(img) cv2.imshow('RetinaFace-Pytorch', img) key = cv2.waitKey(1) if key == ord('q'): print('Now quit.') break cap.release() out.release() cv2.destroyAllWindows()
def main(): args = get_args() # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read image img = skimage.io.imread(args.image_path) img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = cv2.cvtColor(np_img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) image_name = args.image_path.split('/')[-1] save_path = os.path.join(args.save_path, image_name) cv2.imwrite(save_path, img) cv2.imshow('RetinaFace-Pytorch', img) cv2.waitKey()
def main(): args = get_args() # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} RetinaFace = torchvision_model.create_retinaface(return_layers) # Load trained model retina_dict = RetinaFace.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } RetinaFace.load_state_dict(pretrained_dict) RetinaFace = RetinaFace.cuda() RetinaFace.eval() # Read video cap = cv2.VideoCapture(args.video_path) codec = cv2.VideoWriter_fourcc(*'MJPG') width = int(cap.get(3)) height = int(cap.get(4)) cap.set(cv2.CAP_PROP_FRAME_WIDTH, width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height) fps = 25.0 out = cv2.VideoWriter('args.save_path', codec, fps, (width, height)) while (True): ret, img = cap.read() if not ret: print('Video open error.') break img = torch.from_numpy(img) img = img.permute(2, 0, 1) if not args.scale == 1.0: size1 = int(img.shape[1] / args.scale) size2 = int(img.shape[2] / args.scale) img = resize(img.float(), (size1, size2)) input_img = img.unsqueeze(0).float().cuda() picked_boxes, picked_landmarks = eval_widerface.get_detections( input_img, RetinaFace, score_threshold=0.5, iou_threshold=0.3) # np_img = resized_img.cpu().permute(1,2,0).numpy() np_img = img.cpu().permute(1, 2, 0).numpy() np_img.astype(int) img = np_img.astype(np.uint8) for j, boxes in enumerate(picked_boxes): if boxes is not None: for box, landmark in zip(boxes, picked_landmarks[j]): cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), thickness=2) cv2.circle(img, (landmark[0], landmark[1]), radius=1, color=(0, 0, 255), thickness=2) cv2.circle(img, (landmark[2], landmark[3]), radius=1, color=(0, 255, 0), thickness=2) cv2.circle(img, (landmark[4], landmark[5]), radius=1, color=(255, 0, 0), thickness=2) cv2.circle(img, (landmark[6], landmark[7]), radius=1, color=(0, 255, 255), thickness=2) cv2.circle(img, (landmark[8], landmark[9]), radius=1, color=(255, 255, 0), thickness=2) out.write(img) cv2.imshow('RetinaFace-Pytorch', img) key = cv2.waitKey(1) if key == ord('q'): print('Now quit.') break cap.release() out.release() cv2.destroyAllWindows()