def load_hg(self, path, location): data = torch.load(path, map_location=location) state_dict = data['state_dict'] config = data['config'] hg = HGModelTrainer.create_net(config, verbose=False) hg.load_state_dict(state_dict) hg.eval() hg = hg.to(location) return hg, config
def count_model_parameters(model): location = torch.device("cpu") data = torch.load(model, map_location=location) state_dict = data['state_dict'] config = data['config'] config_id = config["config_id"] net = ModelTrainer.create_net(config, verbose=False) net.load_state_dict(state_dict) net.eval() net.to(location) _, trainable_parameters, _ = count_parameters(net) return config_id, trainable_parameters
def run_hg(model, images, gpu): # Load model location = "cpu" if gpu is None else "cuda:%d" % gpu data = torch.load(model, map_location=location) state_dict = data['state_dict'] config = data['config'] net = ModelTrainer.create_net(config, verbose=False) net.load_state_dict(state_dict) net.eval() net.to(location) # Define transformations that normalize the image normMean, normStd = FaceLandmarksTrainingData.TRAIN_MEAN, FaceLandmarksTrainingData.TRAIN_STD normTransform = transforms.Normalize(normMean, normStd) transform = transforms.Compose([ ImageTransform(transforms.ToPILImage()), ImageTransform(transforms.ToTensor()), ImageTransform(normTransform) ]) # Read images and apply transformations to them imgs = [] #origimg = [] for img_fn in images: img = cv2.imread(img_fn)[:, :, ::-1] # BGR -> RGB img = cv2.resize(img, (128, 128)) #origimg.append(img) imgs.append( transform({"image": torch.tensor(img).permute(2, 0, 1)})["image"]) # Run HG on all images (can crash if too many are used) imgs = torch.stack(imgs).to(location) with torch.no_grad(): predictions, *_ = net(imgs) """ for i in range(len(origimg)): d = draw_landmarks(origimg[i][:,:,::-1], res[i].cpu().detach().numpy()) cv2.imwrite("/tmp/pred_%d.jpg" %i, d) """ return predictions.detach()
def run_hg(model, image, bb=False): location = torch.device("cpu") data = torch.load(model, map_location=location) state_dict = data['state_dict'] config = data['config'] net = ModelTrainer.create_net(config, verbose=False) net.load_state_dict(state_dict) net.eval() net.to(location) normMean, normStd = FaceLandmarksTrainingData.TRAIN_MEAN, FaceLandmarksTrainingData.TRAIN_STD normTransform = transforms.Normalize(normMean, normStd) transform = transforms.Compose([ ImageTransform(transforms.ToPILImage()), ImageTransform(transforms.ToTensor()), ImageTransform(normTransform) ]) img = cv2.imread(image) imgs = [] positions = [] # TODO find a way to detect the whole head dnnFaceDetector = dlib.cnn_face_detection_model_v1( "../other/mmod_human_face_detector.dat") faceRects = dnnFaceDetector(img, 0) for rect in faceRects: x1 = rect.rect.left() y1 = rect.rect.top() x2 = rect.rect.right() y2 = rect.rect.bottom() w = x2 - x1 h = y2 - y1 # ensure it is a rectangle if h > w: diff = h - w y2 -= diff if w > h: diff = w - h x2 -= diff positions.append((x1, y1, x2, y2)) face = cv2.resize(img[y1:y2, x1:x2], (128, 128))[:, :, ::-1] imgs.append( transform({"image": torch.tensor(face).permute(2, 0, 1)})["image"]) if bb: cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2) imgs = torch.stack(imgs) out = net(imgs)[0] for coords, (x1, y1, x2, y2) in zip(out.detach().numpy(), positions): img[y1:y2, x1:x2] = draw_landmarks(img[y1:y2, x1:x2], coords, size=1) cv2.imshow("landmarks", img) cv2.waitKey(0) cv2.destroyAllWindows()
parser = argparse.ArgumentParser() parser.add_argument("model") parser.add_argument("menpo") args = parser.parse_args() model = args.model location = "cuda:0" data = torch.load(model, map_location=location) state_dict = data['state_dict'] config = data['config'] num_workers = 8 batch_size = config['batch_size'] pin_memory = True net = ModelTrainer.create_net(config, verbose=False) net.load_state_dict(state_dict) net.eval() net = net.to(location) normMean, normStd = FaceLandmarksTrainingData.TRAIN_MEAN, FaceLandmarksTrainingData.TRAIN_STD normTransform = transforms.Normalize(normMean, normStd) transform = transforms.Compose([ ImageTransform(transforms.ToPILImage()), ImageTransform(transforms.ToTensor()), ImageTransform(normTransform) ]) net.eval()
def visualize(model, dataset, target, gpu=None, splits=["easy", "hard"], landmarks_in_heatmaps=True): location = 'cpu' if gpu is None else "cuda:%d" % gpu if location is not 'cpu': # This fixes the problem that pytorch is always allocating memory on GPU 0 even if this is not included # in the list of GPUs to use torch.cuda.set_device(torch.device(location)) # cudnn.benchmark improves training speed when input sizes do not change # https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936 # It selects the best algorithms as the training iterates over the dataset cudnn.benchmark = True print("Location: ", location) data = torch.load(model, map_location=location) state_dict = data['state_dict'] config = data['config'] num_workers = multiprocessing.cpu_count() batch_size = config['batch_size'] if gpu is not None else num_workers pin_memory = gpu is not None print("Workers: ", num_workers) print("Batchsize: ", batch_size) net = ModelTrainer.create_net(config, verbose=False) net.load_state_dict(state_dict) net.eval() net = net.to(location) mkdir_if_not_exists(target) normMean, normStd = FaceLandmarksTrainingData.TRAIN_MEAN, FaceLandmarksTrainingData.TRAIN_STD normTransform = transforms.Normalize(normMean, normStd) transform = transforms.Compose([ ImageTransform(transforms.ToPILImage()), #ImageAndLabelTransform(RandomHorizontalFlip()), #ImageAndLabelTransform(RandomRotation(min_angle=-0, max_angle=0, retain_scale=False)), ImageTransform(transforms.ToTensor()), ImageTransform(normTransform) ]) with h5py.File(dataset, 'r') as f: if "easy" in splits: print("Run on easy") easy_d = FaceLandmarksEasyTestData(f, transform=transform) #print(len(easy_d)) easy_loader = DataLoader(dataset=easy_d, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, batch_size=batch_size) visualize_split(net, easy_loader, os.path.join(target, "easy"), location, landmarks_in_heatmaps=landmarks_in_heatmaps) if "hard" in splits: print("Run on hard") hard_d = FaceLandmarksHardTestData(f, transform=transform) #print(len(hard_d)) hard_loader = DataLoader(dataset=hard_d, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, batch_size=batch_size) visualize_split(net, hard_loader, os.path.join(target, "hard"), location, landmarks_in_heatmaps=landmarks_in_heatmaps) if "train" in splits: print("Run on train") train = FaceLandmarksTrainingData(f, transform=transform) #print(len(train)) train_loader = DataLoader(dataset=train, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, batch_size=batch_size) visualize_split(net, train_loader, os.path.join(target, "train"), location, landmarks_in_heatmaps=landmarks_in_heatmaps)
def run(model, src_300w, src_menpo, target, gpu=None, override_norm_params=False, bs_factor=1): location = 'cpu' if gpu is None else "cuda:%d" % gpu if location is not 'cpu': # This fixes the problem that pytorch is always allocating memory on GPU 0 even if this is not included # in the list of GPUs to use torch.cuda.set_device(torch.device(location)) # cudnn.benchmark improves training speed when input sizes do not change # https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936 # It selects the best algorithms as the training iterates over the dataset #cudnn.benchmark = True # disable for deterministic behavior print("Location: ", location) data = torch.load(model, map_location=location) state_dict = data['state_dict'] config = data['config'] n_lm = config["n_lm"] if n_lm == 49: print("WARNING! THIS IS A 49 LM model!!!!", n_lm) num_workers = multiprocessing.cpu_count() batch_size = config[ 'batch_size'] * bs_factor if gpu is not None else num_workers pin_memory = gpu is not None print("Workers: ", num_workers) print("Batchsize: ", batch_size) net = ModelTrainer.create_net(config, verbose=False) net.load_state_dict(state_dict) net.eval() net.to(location) mkdir_if_not_exists(os.path.dirname(target)) normMean, normStd = FaceLandmarksTrainingData.TRAIN_MEAN, FaceLandmarksTrainingData.TRAIN_STD if override_norm_params: normMean = tuple( np.array([133.0255852472676, 101.61684197664563, 87.4134193236219]) / 255.0) normStd = tuple( np.array([71.91047346327116, 62.94368776888253, 61.56865329427311]) / 255.0) normTransform = transforms.Normalize(normMean, normStd) transform = transforms.Compose([ ImageTransform(transforms.ToPILImage()), ImageTransform(transforms.ToTensor()), ImageTransform(normTransform) ]) with h5py.File(src_300w, 'r') as f: print("Run on easy") easy_d = FaceLandmarksEasyTestData(f, transform=transform, n_lm=n_lm) easy_loader = DataLoader(dataset=easy_d, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, batch_size=batch_size) easy_results = evaluate_split(net, easy_loader, location=location, n_lm=n_lm) print("Run on hard") hard_d = FaceLandmarksHardTestData(f, transform=transform, n_lm=n_lm) hard_loader = DataLoader(dataset=hard_d, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, batch_size=batch_size) hard_results = evaluate_split(net, hard_loader, location=location, n_lm=n_lm) print("Run on train") train = FaceLandmarksTrainingData(f, transform=transform, n_lm=n_lm) train_loader = DataLoader(dataset=train, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, batch_size=batch_size) train_results = evaluate_split(net, train_loader, location=location, n_lm=n_lm) with h5py.File(src_menpo, "r") as f: print("Run on menpo") menpo = Menpo(f, transform=transform, n_lm=n_lm) menpo_loader = DataLoader(dataset=menpo, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, batch_size=batch_size) menpo_results = evaluate_split(net, menpo_loader, location=location, n_lm=n_lm) res = { "easy": easy_results, "hard": hard_results, "train": train_results, "menpo": menpo_results, "model_src": model, "config": config } if target is not None: json.dump(res, open(target, "w")) else: return res