def main(): learning_rate = 4 * 1e-4 if args.start_from: start_from = torch.load(args.start_from, map_location=device) nstack = start_from['nstack'] nfeatures = start_from['nfeatures'] nlandmarks = start_from['nlandmarks'] best_val_loss = start_from['best_val_loss'] eyenet = EyeNet(nstack=nstack, nfeatures=nfeatures, nlandmarks=nlandmarks).to(device) optimizer = torch.optim.Adam(eyenet.parameters(), lr=learning_rate) eyenet.load_state_dict(start_from['model_state_dict']) optimizer.load_state_dict(start_from['optimizer_state_dict']) elif os.path.exists(args.out): raise Exception(f'Out file {args.out} already exists.') else: nstack = args.nstack nfeatures = args.nfeatures nlandmarks = args.nlandmarks best_val_loss = float('inf') eyenet = EyeNet(nstack=nstack, nfeatures=nfeatures, nlandmarks=nlandmarks).to(device) optimizer = torch.optim.Adam(eyenet.parameters(), lr=learning_rate) train(eyenet=eyenet, optimizer=optimizer, nepochs=args.nepochs, best_val_loss=best_val_loss, checkpoint_fn=args.out)
def validate(eyenet: EyeNet, val_loader: DataLoader) -> float: with torch.no_grad(): val_losses = [] for val_batch in val_loader: val_imgs = val_batch['img'].float().to(device) heatmaps = val_batch['heatmaps'].to(device) landmarks = val_batch['landmarks'].to(device) gaze = val_batch['gaze'].float().to(device) heatmaps_pred, landmarks_pred, gaze_pred = eyenet.forward(val_imgs) heatmaps_loss, landmarks_loss, gaze_loss = eyenet.calc_loss( heatmaps_pred, heatmaps, landmarks_pred, landmarks, gaze_pred, gaze) loss = 1000 * heatmaps_loss + landmarks_loss + gaze_loss val_losses.append(loss.item()) val_loss = np.mean(val_losses) return val_loss
def init_model(transform): global face_cascade, landmarks_detector, checkpoint, nstack, nfeatures, nlandmarks, eyenet device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # dirname = os.path.dirname(__file__) dirname = pathToProject face_cascade = cv2.CascadeClassifier( os.path.join(dirname, 'lbpcascade_frontalface_improved.xml')) landmarks_detector = dlib.shape_predictor( os.path.join(dirname, 'shape_predictor_5_face_landmarks.dat')) # face_cascade = cv2.CascadeClassifier(dirname + 'lbpcascade_frontalface_improved.xml') # landmarks_detector = dlib.shape_predictor(dirname +'shape_predictor_5_face_landmarks.dat') checkpoint = torch.load('checkpoint.pt', map_location=device) # checkpoint = torch.load(dirname + 'checkpoint.pt', map_location=device) nstack = checkpoint['nstack'] nfeatures = checkpoint['nfeatures'] nlandmarks = checkpoint['nlandmarks'] eyenet = EyeNet(nstack=nstack, nfeatures=nfeatures, nlandmarks=nlandmarks).to(device) eyenet.load_state_dict(checkpoint['model_state_dict']) return None, None
from datasets.mpii_gaze import MPIIGaze from models.eyenet import EyeNet import os import numpy as np import cv2 from util.preprocess import gaussian_2d from matplotlib import pyplot as plt import util.gaze device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") dataset = MPIIGaze() checkpoint = torch.load('checkpoint.pt', map_location=device) nstack = checkpoint['nstack'] nfeatures = checkpoint['nfeatures'] nlandmarks = checkpoint['nlandmarks'] eyenet = EyeNet(nstack=nstack, nfeatures=nfeatures, nlandmarks=nlandmarks).to(device) eyenet.load_state_dict(checkpoint['model_state_dict']) with torch.no_grad(): errors = [] print('N', len(dataset)) for i, sample in enumerate(dataset): print(i) x = torch.tensor([sample['img']]).float().to(device) heatmaps_pred, landmarks_pred, gaze_pred = eyenet.forward(x) gaze = sample['gaze'].reshape((1, 2)) gaze_pred = np.asarray(gaze_pred.cpu().numpy())
def train_epoch(epoch: int, eyenet: EyeNet, optimizer, train_loader: DataLoader, val_loader: DataLoader, best_val_loss: float, checkpoint_fn: str, writer: SummaryWriter): N = len(train_loader) for i_batch, sample_batched in enumerate(train_loader): i_batch += N * epoch imgs = sample_batched['img'].float().to(device) heatmaps_pred, landmarks_pred, gaze_pred = eyenet.forward(imgs) heatmaps = sample_batched['heatmaps'].to(device) landmarks = sample_batched['landmarks'].float().to(device) gaze = sample_batched['gaze'].float().to(device) heatmaps_loss, landmarks_loss, gaze_loss = eyenet.calc_loss( heatmaps_pred, heatmaps, landmarks_pred, landmarks, gaze_pred, gaze) loss = 1000 * heatmaps_loss + landmarks_loss + gaze_loss optimizer.zero_grad() loss.backward() optimizer.step() hm = np.mean(heatmaps[-1, 8:16].cpu().detach().numpy(), axis=0) hm_pred = np.mean(heatmaps_pred[-1, -1, 8:16].cpu().detach().numpy(), axis=0) norm_hm = cv2.normalize(hm, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) norm_hm_pred = cv2.normalize(hm_pred, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) if i_batch % 20 == 0: cv2.imwrite('true.jpg', norm_hm * 255) cv2.imwrite('pred.jpg', norm_hm_pred * 255) cv2.imwrite('eye.jpg', sample_batched['img'].numpy()[-1] * 255) writer.add_scalar("Training heatmaps loss", heatmaps_loss.item(), i_batch) writer.add_scalar("Training landmarks loss", landmarks_loss.item(), i_batch) writer.add_scalar("Training gaze loss", gaze_loss.item(), i_batch) writer.add_scalar("Training loss", loss.item(), i_batch) if i_batch > 0 and i_batch % 20 == 0: val_loss = validate(eyenet=eyenet, val_loader=val_loader) writer.add_scalar("validation loss", val_loss, i_batch) print('Epoch', epoch, 'Validation loss', val_loss) if val_loss < best_val_loss: best_val_loss = val_loss torch.save( { 'nstack': eyenet.nstack, 'nfeatures': eyenet.nfeatures, 'nlandmarks': eyenet.nlandmarks, 'best_val_loss': best_val_loss, 'model_state_dict': eyenet.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, checkpoint_fn) return best_val_loss
webcam.set(cv2.CAP_PROP_FRAME_WIDTH, 960) webcam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) webcam.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) webcam.set(cv2.CAP_PROP_FPS, 60) dirname = os.path.dirname(__file__) face_cascade = cv2.CascadeClassifier( os.path.join(dirname, 'lbpcascade_frontalface_improved.xml')) landmarks_detector = dlib.shape_predictor( os.path.join(dirname, 'shape_predictor_5_face_landmarks.dat')) checkpoint = torch.load('checkpoint.pt', map_location=device) nstack = checkpoint['nstack'] nfeatures = checkpoint['nfeatures'] nlandmarks = checkpoint['nlandmarks'] eyenet = EyeNet(nstack=nstack, nfeatures=nfeatures, nlandmarks=nlandmarks).to(device) eyenet.load_state_dict(checkpoint['model_state_dict']) def main(): current_face = None landmarks = None alpha = 0.95 left_eye = None right_eye = None while True: _, frame_bgr = webcam.read() orig_frame = frame_bgr.copy() frame = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)