def forward(self, es, ta): if self.mod is None: sys.path.append(str(config.lpips_root)) import PerceptualSimilarity.models as ps self.mod = ps.PerceptualLoss() if self.clip: es = torch.clamp(es, -1, 1) out = self.mod(es, ta, normalize=False) return out.mean()
def compute_lpips(gt_path, inp_path, version='0.0', use_gpu=True): model = models.PerceptualLoss(model='net-lin', net='alex', use_gpu=use_gpu, version=version) img0_np = util.load_image(gt_path) img1_np = util.load_image(inp_path) img0 = util.im2tensor(img0_np) img1 = util.im2tensor(img1_np) if (use_gpu): img0 = img0.cuda() img1 = img1.cuda() dist01 = model.forward(img0, img1) if use_gpu: return dist01.item() return dist01
def main(args): ## Distributed computing # utility for synchronization def reduce_tensor(tensor): rt = tensor.clone() torch.distributed.all_reduce(rt, op = torch.distributed.ReduceOp.SUM) return rt # enable distributed computing if args.distributed: set_affinity(args.local_rank) num_devices = torch.cuda.device_count() torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend = 'nccl', init_method = 'env://') world_size = torch.distributed.get_world_size() #os.environ['WORLD_SIZE'] print('num_devices', num_devices, 'local_rank', args.local_rank, 'world_size', world_size) else: # if not args.distributed: num_devices, world_size = 1, 1 ## Model preparation (Conv-LSTM or Conv-TT-LSTM) # construct the model with the specified hyper-parameters model = ConvLSTMNet( input_channels = args.img_channels, output_sigmoid = args.use_sigmoid, # model architecture layers_per_block = (3, 3, 3, 3), hidden_channels = (32, 48, 48, 32), skip_stride = 2, # convolutional tensor-train layers cell = args.model, cell_params = { "order": args.model_order, "steps": args.model_steps, "ranks": args.model_ranks}, # convolutional parameters kernel_size = args.kernel_size).cuda() if args.distributed: if args.use_apex: # use DDP from apex.parallel from apex.parallel import DistributedDataParallel as DDP model = DDP(model, delay_allreduce = True) else: # use DDP from nn.parallel from torch.nn.parallel import DistributedDataParallel as DDP model = DDP(model, device_ids = [args.local_rank]) PSmodel = PSmodels.PerceptualLoss(model = 'net-lin', net = 'alex', use_gpu = True, gpu_ids = [args.local_rank]) ## Dataset Preparation (KTH, UCF, tinyUCF) Dataset = {"KTH": KTH_Dataset, "MNIST": MNIST_Dataset}[args.dataset] DATA_DIR = os.path.join("../data", {"MNIST": "mnist", "KTH": "kth"}[args.dataset]) # batch size for each process total_batch_size = args.batch_size assert total_batch_size % world_size == 0, \ 'The batch_size is not divisible by world_size.' batch_size = total_batch_size // world_size total_frames = args.input_frames + args.future_frames # dataloaer for the valiation dataset test_data_path = os.path.join(DATA_DIR, args.test_data_file) assert os.path.exists(test_data_path), \ "The test dataset does not exist. "+test_data_path test_dataset = Dataset({"path": test_data_path, "unique_mode": True, "num_frames": total_frames, "num_samples": args.test_samples, "height": args.img_height, "width": args.img_width, "channels": args.img_channels, 'training': False}) test_sampler = torch.utils.data.distributed.DistributedSampler( test_dataset, num_replicas = world_size, rank = args.local_rank, shuffle = False) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size = batch_size, drop_last = True, num_workers = num_devices * 4, pin_memory = True, sampler = test_sampler) test_samples = len(test_loader) * total_batch_size print(test_samples) ## Main script for test phase MSE_ = torch.zeros((args.future_frames), dtype = torch.float32).cuda() PSNR_ = torch.zeros((args.future_frames), dtype = torch.float32).cuda() SSIM_ = torch.zeros((args.future_frames), dtype = torch.float32).cuda() PIPS_ = torch.zeros((args.future_frames), dtype = torch.float32).cuda() with torch.no_grad(): model.eval() for it, frames in enumerate(test_loader): frames = frames.permute(0, 1, 4, 2, 3).cuda() inputs = frames[:, :args.input_frames] origin = frames[:, -args.future_frames:] pred = model(inputs, input_frames = args.input_frames, future_frames = args.future_frames, output_frames = args.future_frames, teacher_forcing = False) # accumlate the statistics per frame for t in range(-args.future_frames, 0): origin_, pred_ = origin[:, t], pred[:, t] if args.img_channels == 1: origin_ = origin_.repeat([1, 3, 1, 1]) pred_ = pred_.repeat([1, 3, 1, 1]) dist = PSmodel(origin_, pred_) PIPS_[t] += torch.sum(dist).item() origin = origin.permute(0, 1, 3, 4, 2).cpu().numpy() pred = pred.permute(0, 1, 3, 4, 2).cpu().numpy() for t in range(-args.future_frames, 0): for i in range(batch_size): origin_, pred_ = origin[i, t], pred[i, t] if args.img_channels == 1: origin_ = np.squeeze(origin_, axis = -1) pred_ = np.squeeze(pred_, axis = -1) MSE_[t] += skimage.metrics.mean_squared_error(origin_, pred_) PSNR_[t] += skimage.metrics.peak_signal_noise_ratio(origin_, pred_) SSIM_[t] += skimage.metrics.structural_similarity(origin_, pred_, multichannel = args.img_channels > 1) if args.distributed: MSE = reduce_tensor( MSE_) / test_samples PSNR = reduce_tensor(PSNR_) / test_samples SSIM = reduce_tensor(SSIM_) / test_samples PIPS = reduce_tensor(PIPS_) / test_samples else: # if not args.distributed: MSE = MSE_ / test_samples PSNR = PSNR_ / test_samples SSIM = SSIM_ / test_samples PIPS = PIPS_ / test_samples if args.local_rank == 0: print("MSE: {} (x1e-3)\nPSNR: {}\nSSIM: {}\nLPIPS: {}".format( 1e3 * torch.mean(MSE).cpu().item(), torch.mean(PSNR).cpu().item(), torch.mean(SSIM).cpu().item(), torch.mean(PIPS).cpu().item())) print( "MSE:", MSE.cpu().numpy()) print("PSNR:", PSNR.cpu().numpy()) print("SSIM:", SSIM.cpu().numpy()) print("PIPS:", PIPS.cpu().numpy())
def main(args): ## Model preparation (Conv-LSTM or Conv-TT-LSTM) # whether to use GPU (or CPU) use_cuda = args.use_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # whether to use multi-GPU (or single-GPU) multi_gpu = use_cuda and args.multi_gpu and torch.cuda.device_count() > 1 num_gpus = ( torch.cuda.device_count() if multi_gpu else 1) if use_cuda else 0 # construct the model with the specified hyper-parameters model = ConvLSTMNet( # input to the model input_channels=args.img_channels, # architecture of the model layers_per_block=(3, 3, 3, 3), hidden_channels=(32, 48, 48, 32), skip_stride=2, # parameters of convolutional tensor-train layers cell=args.model, cell_params={ "order": args.model_order, "steps": args.model_steps, "rank": args.model_rank }, # parameters of convolutional operations kernel_size=args.kernel_size, bias=True, # output function and output format output_sigmoid=args.use_sigmoid) # move the model to the device (CPU, GPU, multi-GPU) model.to(device) if multi_gpu: model = nn.DataParallel(model) # load the model parameters from checkpoint model.load_state_dict(torch.load(args.checkpoint)) ## Dataset Preparation (Moving-MNIST, KTH) Dataset = {"MNIST": MNIST_Dataset, "KTH": KTH_Dataset}[args.dataset] DATA_DIR = os.path.join("../../datasets", { "MNIST": "moving-mnist", "KTH": "kth" }[args.dataset]) # number of total frames total_frames = args.input_frames + args.future_frames # dataloaer for test set test_data_path = os.path.join(DATA_DIR, args.test_data_file) test_data = Dataset({ "path": test_data_path, "unique_mode": True, "num_frames": total_frames, "num_samples": args.test_samples, "height": args.img_height, "width": args.img_width, "channels": args.img_channels }) test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=5 * max(num_gpus, 1), drop_last=True) test_size = len(test_data_loader) * args.batch_size ## Main script for test phase model.eval() MSE = np.zeros(args.future_frames, dtype=np.float32) PSNR = np.zeros(args.future_frames, dtype=np.float32) SSIM = np.zeros(args.future_frames, dtype=np.float32) PIPS = np.zeros(args.future_frames, dtype=np.float32) PSmodel = PSmodels.PerceptualLoss(model='net-lin', net='alex', use_gpu=use_cuda, gpu_ids=[0]) with torch.no_grad(): for frames in test_data_loader: # 5-th order: batch_size x total_frames x channels x height x width frames = frames.permute(0, 1, 4, 2, 3).to(device) inputs = frames[:, :args.input_frames] origin = frames[:, -args.future_frames:] pred = model(inputs, input_frames=args.input_frames, future_frames=args.future_frames, output_frames=args.future_frames, teacher_forcing=False) # clamp the output to [0, 1] pred = torch.clamp(pred, min=0, max=1) # accumlate the statistics per frame for t in range(-args.future_frames, 0): origin_, pred_ = origin[:, t], pred[:, t] if args.img_channels == 1: origin_ = origin_.repeat([1, 3, 1, 1]) pred_ = pred_.repeat([1, 3, 1, 1]) dist = PSmodel(origin_, pred_) PIPS[t] += torch.sum(dist).item() / test_size origin = origin.permute(0, 1, 3, 4, 2).cpu().numpy() pred = pred.permute(0, 1, 3, 4, 2).cpu().numpy() for t in range(-args.future_frames, 0): for i in range(args.batch_size): origin_, pred_ = origin[i, t], pred[i, t] if args.img_channels == 1: origin_ = np.squeeze(origin_, axis=-1) pred_ = np.squeeze(pred_, axis=-1) MSE[t] += skimage.measure.compare_mse(origin_, pred_) / test_size PSNR[t] += skimage.measure.compare_psnr(origin_, pred_) / test_size SSIM[t] += skimage.measure.compare_ssim( origin_, pred_, multichannel=(args.img_channels > 1)) / test_size print("MSE: {} (x1e-3); PSNR: {}, SSIM: {}, LPIPS: {}".format( 1e3 * np.mean(MSE), np.mean(PSNR), np.mean(SSIM), np.mean(PIPS))) print("MSE:", MSE) print("PSNR:", PSNR) print("SSIM:", SSIM) print("PIPS:", PIPS)
def eval_model(model, loader, device, vocab, use_gt_boxes=False, use_feats=False, filter_box=False): all_boxes = defaultdict(list) total_iou = [] total_boxes = 0 num_batches = 0 num_samples = 0 mae_per_image = [] mae_roi_per_image = [] roi_only_iou = [] ssim_per_image = [] ssim_rois = [] rois = 0 margin = 2 ## Initializing the perceptual loss model lpips_model = models.PerceptualLoss(model='net-lin', net='alex', use_gpu=True) perceptual_error_image = [] # --------------------------------------- img_idx = 0 with torch.no_grad(): for batch in tqdm.tqdm(loader): num_batches += 1 # if num_batches > 10: # break batch = [tensor.to(device) for tensor in batch] masks = None #len", len(batch)) imgs, objs, boxes, triples, obj_to_img, triple_to_img, imgs_in = [ b.to(device) for b in batch ] predicates = triples[:, 1] #EVAL_ALL = True if not args.generative: imgs, imgs_in, objs, boxes, triples, obj_to_img, \ dropimage_indices, dropfeats_indices = [b.to(device) for b in process_batch( imgs, imgs_in, objs, boxes, triples, obj_to_img, triple_to_img, device, use_feats=use_feats, filter_box=filter_box)] dropbox_indices = dropimage_indices else: dropbox_indices = torch.ones_like( objs.unsqueeze(1).float()).to(device) dropfeats_indices = torch.ones_like( objs.unsqueeze(1).float()).to(device) dropimage_indices = torch.zeros_like( objs.unsqueeze(1).float()).to(device) if imgs.shape[0] == 0: continue if args.visualize_graphs: # visualize scene graphs for debugging purposes visualize_scene_graphs(obj_to_img, objs, triples, vocab, device) if use_gt_boxes: model_out = model( objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=masks, src_image=imgs_in, keep_box_idx=torch.ones_like(dropimage_indices), keep_feat_idx=dropfeats_indices, keep_image_idx=dropimage_indices, mode='eval') else: model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, src_image=imgs_in, keep_box_idx=dropimage_indices, keep_feats_idx=dropfeats_indices, keep_image_idx=dropimage_indices, mode='eval') # OUTPUT imgs_pred, boxes_pred, masks_pred, _, _ = model_out # ---------------------------------------------------------------------------------------------------------- # Save all box predictions all_boxes['boxes_gt'].append(boxes) all_boxes['objs'].append(objs) all_boxes['boxes_pred'].append(boxes_pred) all_boxes['drop_targets'].append(dropbox_indices) # IoU over all total_iou.append(jaccard(boxes_pred, boxes).detach().cpu().numpy()) total_boxes += boxes_pred.size(0) # IoU over targets only pred_dropbox = boxes_pred[dropbox_indices.squeeze() == 0, :] gt_dropbox = boxes[dropbox_indices.squeeze() == 0, :] roi_only_iou.append( jaccard(pred_dropbox, gt_dropbox).detach().cpu().numpy()) rois += pred_dropbox.size(0) num_samples += imgs.shape[0] imgs = imagenet_deprocess_batch(imgs).float() imgs_pred = imagenet_deprocess_batch(imgs_pred).float() if args.visualize_imgs_boxes: # visualize images with drawn boxes for debugging purposes visualize_imgs_boxes(imgs, imgs_pred, boxes, boxes_pred) if args.save_images: # save reconstructed images for later FID and Inception computation if args.save_gt_images: # pass imgs as argument to additionally save gt images save_images(imgs_pred, img_idx, imgs) else: save_images(imgs_pred, img_idx) # MAE per image mae_per_image.append( torch.mean( torch.abs(imgs - imgs_pred).view(imgs.shape[0], -1), 1).cpu().numpy()) for s in range(imgs.shape[0]): # get coordinates of target left, right, top, bottom = bbox_coordinates_with_margin( boxes[s, :], margin, imgs) if left > right or top > bottom: continue # calculate errors only in RoI one by one mae_roi_per_image.append( torch.mean( torch.abs(imgs[s, :, top:bottom, left:right] - imgs_pred[s, :, top:bottom, left:right])).cpu().item()) ssim_per_image.append( pytorch_ssim.ssim(imgs[s:s + 1, :, :, :] / 255.0, imgs_pred[s:s + 1, :, :, :] / 255.0, window_size=3).cpu().item()) ssim_rois.append( pytorch_ssim.ssim( imgs[s:s + 1, :, top:bottom, left:right] / 255.0, imgs_pred[s:s + 1, :, top:bottom, left:right] / 255.0, window_size=3).cpu().item()) # normalize as expected from the LPIPS model imgs_pred_norm = imgs_pred[s:s + 1, :, :, :] / 127.5 - 1 imgs_gt_norm = imgs[s:s + 1, :, :, :] / 127.5 - 1 perceptual_error_image.append( lpips_model.forward(imgs_pred_norm, imgs_gt_norm).detach().cpu().numpy()) if num_batches % args.print_every == 0: calculate_scores(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou, ssim_per_image, ssim_rois, perceptual_error_image) if num_batches % args.save_every == 0: save_results(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou, ssim_per_image, ssim_rois, perceptual_error_image, all_boxes, num_batches) img_idx += 1 calculate_scores(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou, ssim_per_image, ssim_rois, perceptual_error_image) save_results(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou, ssim_per_image, ssim_rois, perceptual_error_image, all_boxes, 'final')
def main(args): ## Distributed computing # utility for synchronization def reduce_tensor(tensor, reduce_sum=False): rt = tensor.clone() torch.distributed.all_reduce(rt, op=torch.distributed.ReduceOp.SUM) return rt if reduce_sum else (rt / world_size) # enable distributed computing if args.distributed: set_affinity(args.local_rank) num_devices = torch.cuda.device_count() torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') node_rank = args.node_rank global_rank = node_rank * num_devices + args.local_rank world_size = torch.distributed.get_world_size( ) #os.environ['WORLD_SIZE'] else: global_rank, num_devices, world_size = 0, 1, 1 ## Data format: batch(0) x steps(1) x height(2) x width(3) x channels(4) # batch_size (0) total_batch_size = args.batch_size assert total_batch_size % world_size == 0, \ 'The batch_size is not divisible by world_size.' batch_size = total_batch_size // world_size # steps (1) total_frames = args.future_frames + args.input_frames # frame format (2, 3) img_resize = (args.img_height != args.img_height_u) or (args.img_width != args.img_width_u) ## Model preparation (Conv-LSTM or Conv-TT-LSTM) # size of the neural network model (depth and width) layers_per_block = (3, 3, 3, 3) hidden_channels = (32, 48, 48, 32) skip_stride = 2 # construct the model with the specified hyper-parameters model = ConvLSTMNet( # architecture of the model layers_per_block=layers_per_block, hidden_channels=hidden_channels, input_channels=1, skip_stride=skip_stride, cell_params={ "steps": 3, "order": 3, "ranks": 8 }, # parameters of convolutional operation kernel_size=5, bias=True).cuda() if args.distributed: model = DDP(model, device_ids=[args.local_rank]) PSmodel = PSmodels.PerceptualLoss(model='net-lin', net='alex', use_gpu=True, gpu_ids=[args.local_rank]) ## Dataset Preparation (KTH, UCF, tinyUCF) assert args.dataset in ["MNIST", "KTH"], \ "The dataset is not currently supported." Dataset = {"KTH": KTH_Dataset, "MNIST": MNIST_Dataset}[args.dataset] # path to the dataset folder DATA_DIR = args.data_path assert os.path.exists(DATA_DIR), \ "The dataset folder does not exist. "+DATA_DIR assert os.path.exists(DATA_DIR), \ "The test dataset does not exist. "+DATA_DIR test_dataset = Dataset({ "path": DATA_DIR, "unique_mode": True, "num_frames": total_frames, "num_samples": args.test_samples, "height": args.img_height, "width": args.img_width, "channels": 1, 'training': False }) test_sampler = torch.utils.data.distributed.DistributedSampler( test_dataset, num_replicas=world_size, rank=global_rank, shuffle=False) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, drop_last=True, num_workers=num_devices * 4, pin_memory=True, sampler=test_sampler) test_samples = len(test_loader) * total_batch_size MODEL_FILE = args.model_path assert os.path.exists(MODEL_FILE), \ "The specified model is not found in the folder." checkpoint = torch.load(MODEL_FILE) eval_epoch = checkpoint.get("epoch", 0) model.load_state_dict(checkpoint["model_state_dict"]) ## Main script for test phase MSE_ = torch.zeros((args.future_frames), dtype=torch.float32).cuda() PSNR_ = torch.zeros((args.future_frames), dtype=torch.float32).cuda() SSIM_ = torch.zeros((args.future_frames), dtype=torch.float32).cuda() PIPS_ = torch.zeros((args.future_frames), dtype=torch.float32).cuda() with torch.no_grad(): model.eval() samples = 0 for it, frames in enumerate(test_loader): samples += total_batch_size frames = torch.mean(frames, dim=-1, keepdim=True) if img_resize: frames_ = frames.cpu().numpy() frames = np.zeros((batch_size, total_frames, args.img_height_u, args.img_width_u, 1), dtype=np.float32) for b in range(batch_size): for t in range(total_frames): frames[b, t] = skimage.transform.resize( frames_[b, t], (args.img_height_u, args.img_width_u)) frames = torch.from_numpy(frames) # 5-th order: batch_size x total_frames x channels x height x width frames = frames.permute(0, 1, 4, 2, 3).cuda() inputs = frames[:, :args.input_frames] origin = frames[:, -args.future_frames:] pred = model(inputs, input_frames=args.input_frames, future_frames=args.future_frames, output_frames=args.future_frames, teacher_forcing=False) # clamp the output to [0, 1] pred = torch.clamp(pred, min=0, max=1) # accumlate the statistics per frame for t in range(-args.future_frames, 0): origin_, pred_ = origin[:, t], pred[:, t] origin_ = origin_.repeat([1, 3, 1, 1]) pred_ = pred_.repeat([1, 3, 1, 1]) dist = PSmodel(origin_, pred_) PIPS_[t] += torch.sum(dist).item() origin = origin.permute(0, 1, 3, 4, 2).cpu().numpy() pred = pred.permute(0, 1, 3, 4, 2).cpu().numpy() for t in range(-args.future_frames, 0): for i in range(batch_size): origin_, pred_ = origin[i, t], pred[i, t] origin_ = np.squeeze(origin_, axis=-1) pred_ = np.squeeze(pred_, axis=-1) MSE_[t] += skimage.metrics.mean_squared_error( origin_, pred_) PSNR_[t] += skimage.metrics.peak_signal_noise_ratio( origin_, pred_) SSIM_[t] += skimage.metrics.structural_similarity( origin_, pred_) if args.distributed: MSE = reduce_tensor(MSE_, reduce_sum=True) / samples PSNR = reduce_tensor(PSNR_, reduce_sum=True) / samples SSIM = reduce_tensor(SSIM_, reduce_sum=True) / samples PIPS = reduce_tensor(PIPS_, reduce_sum=True) / samples else: MSE = MSE_ / samples PSNR = PSNR_ / samples SSIM = SSIM_ / samples PIPS = PIPS_ / samples if ((it + 1) % 50 == 0 or it + 1 == len(test_loader)) and args.local_rank == 0: print((it + 1) * total_batch_size, '/', test_samples, ": MSE: ", torch.mean(MSE).cpu().item() * 1e3, "; PSNR: ", torch.mean(PSNR).cpu().item(), "; SSIM: ", torch.mean(SSIM).cpu().item(), ";LPIPS: ", torch.mean(PIPS).cpu().item()) if args.distributed: MSE = reduce_tensor(MSE_, reduce_sum=True) / test_samples PSNR = reduce_tensor(PSNR_, reduce_sum=True) / test_samples SSIM = reduce_tensor(SSIM_, reduce_sum=True) / test_samples PIPS = reduce_tensor(PIPS_, reduce_sum=True) / test_samples else: MSE = MSE_ / test_samples PSNR = PSNR_ / test_samples SSIM = SSIM_ / test_samples PIPS = PIPS_ / test_samples MSE_AVG = torch.mean(MSE).cpu().item() PSNR_AVG = torch.mean(PSNR).cpu().item() SSIM_AVG = torch.mean(SSIM).cpu().item() PIPS_AVG = torch.mean(PIPS).cpu().item() if args.local_rank == 0: print( "Epoch \t{} \tMSE: \t{} (x1e-3) \tPSNR: \t{} \tSSIM: \t{} \tLPIPS: \t{}" .format(eval_epoch, 1e3 * MSE_AVG, PSNR_AVG, SSIM_AVG, PIPS_AVG))
def __init__(self): super(PerceptualLossLPIPS, self).__init__() #self.loss_network = ps.PerceptualLoss(use_gpu=torch.cuda.is_available()) self.loss_network = models.PerceptualLoss( use_gpu=torch.cuda.is_available())
def main(ref_dir, generated_dir, version='0.0', use_gpu=True): """ Compute the mean and standard deviation of the LPIPS, PSNR and SSIM metrics over an image directory Args: ref_dir: reference images directory generated_dir: generated images directory version: version of LPIPS to use, default 0.0 use_gpu: whether to use gpu for faster computation """ ## Initialize the LPIPS model model = models.PerceptualLoss(model='net-lin', net='alex', use_gpu=use_gpu, version=version) files = os.listdir(ref_dir) lpips_list = np.empty(len(files)) psnr_list = np.empty(len(files)) ssim_list = np.empty(len(files)) for i, file in enumerate(files): if (os.path.exists(os.path.join(generated_dir, file))): # Load images img0_np = util.load_image(os.path.join(ref_dir, file)) img1_np = util.load_image(os.path.join(generated_dir, file)) img0 = util.im2tensor(img0_np) img1 = util.im2tensor(img1_np) if (use_gpu): img0 = img0.cuda() img1 = img1.cuda() # Compute LPIPS distance dist01 = model.forward(img0, img1) lpips_list[i] = dist01 # Compute PSNR value psnr = metrics.peak_signal_noise_ratio(img0_np, img1_np) psnr_list[i] = psnr # Compute SSIM value ssim = metrics.structural_similarity(img0_np, img1_np, multichannel=True) ssim_list[i] = ssim print('%s: %.4f, %.4f, %.4f' % (file, dist01, psnr, ssim)) print("LPIPS mean: {:.4f}".format(lpips_list.mean())) print("LPIPS std: {:.4f}".format(lpips_list.std())) print("PSNR mean: {:.4f}".format(psnr_list.mean())) print("PSNR std: {:.4f}".format(psnr_list.std())) print("SSIM mean: {:.4f}".format(ssim_list.mean())) print("SSIM std: {:.4f}".format(ssim_list.std()))
def main(args): ## Data format: batch_size(0) x time_steps(1) x # img_height(2) x img_width(3) x channels(4) # batch size (0) assert args.log_samples % args.batch_size == 0, \ "The argument log_samples should be a multiple of batch_size." # frame split (1) input_frames = args.input_frames future_frames = args.future_frames total_frames = input_frames + future_frames log_frames = args.log_frames list_input_frames = list(range(0, input_frames, log_frames)) plot_input_frames = len(list_input_frames) list_future_frames = list(range(0, future_frames, log_frames)) plot_future_frames = len(list_future_frames) assert args.img_channels in [1, 3], \ "The number of channels is either 1 or 3." img_colored = (args.img_channels == 3) ## Model preparation (Conv-LSTM) # whether to use GPU (or CPU) use_cuda = args.use_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # whether to use multi-GPU (or single-GPU) multi_gpu = (use_cuda and args.multi_gpu and torch.cuda.device_count() > 1) # number of GPUs used for training num_gpus = ( torch.cuda.device_count() if multi_gpu else 1) if use_cuda else 0 print("Device: %s (# of GPUs: %d)" % (device, num_gpus)) # size of the Conv-LSTM network if args.model_size == "origin": # 12-layers layers_per_block = (3, ) * 4 hidden_channels = (32, 48, 48, 32) skip_stride = 2 elif args.model_size == "small": layers_per_block = (3, ) * 4 hidden_channels = (32, ) * 4 skip_stride = 2 elif args.model_size == "shallow": # 4-layers layers_per_block = (4, ) hidden_channels = (128, ) skip_stride = None else: raise NotImplementedError # construct the model with the specified hyper-parameters model = ConvLSTMNet( # model architecture layers_per_block, hidden_channels, skip_stride=skip_stride, # input/output interfaces input_channels=args.img_channels, output_sigmoid=args.use_sigmoid, input_height=args.img_height, input_width=args.img_width, # non-local blocks non_local=args.use_non_local, pairwise_function=args.pairwise_function, use_norm=args.use_norm, sub_sampling=args.use_sub_sample, # convolutional layers arma=args.use_arma, w_dilation=args.w_dilation, w_kernel_size=args.w_kernel_size, w_bias=args.use_bias, a_kernel_size=args.a_kernel_size, a_padding_mode=args.a_padding_mode) # count the total number of model parameters num_params = sum(param.numel() for param in model.parameters() if param.requires_grad) print("# of params. = ", num_params) # move the model to the device (CPU, GPU, multi-GPU) model.to(device) if multi_gpu: model = nn.DataParallel(model) # create the name and timestamp of the model model_name = args.model_name + '_' + args.model_stamp print("Model name:", model_name) print("# of future frames:", future_frames) PSmodel = PSmodels.PerceptualLoss(model='net-lin', net='alex', use_gpu=use_cuda, gpu_ids=[0]) ## Dataset Preparation (Moving-MNIST) dataset = args.dataset Dataset = {"MNIST": MNIST_Dataset}[dataset] # path to the dataset folder if args.data_path == "default": DATA_DIR = {"MNIST": "moving-mnist"}[dataset] DATA_DIR = os.path.join("../datasets", DATA_DIR) else: # if args.data_path != "default": DATA_DIR = args.data_path assert os.path.exists(DATA_DIR), \ "The dataset folder does not exist." # number of workers for the dataloaders num_workers = 5 * max(num_gpus, 1) # dataloaer for test set test_data_path = os.path.join(DATA_DIR, args.test_data_file) assert os.path.exists(test_data_path), \ "The test set does not exist." test_data = Dataset({ "path": test_data_path, "unique_mode": True, "num_frames": total_frames, "num_samples": args.test_samples, "height": args.img_height, "width": args.img_width, "channels": args.img_channels }) test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=num_workers, drop_last=True) test_size = len(test_data_loader) * args.batch_size print("# of test samples:", test_size) ## Outputs (Models and Results) if args.output_path == "default": OUTPUT_DIR = {"MNIST": "./moving-mnist"}[dataset] else: # if args.output_path != "default": OUTPUT_DIR = args.output_path OUTPUT_DIR = os.path.join(OUTPUT_DIR, model_name) if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) # path to the models MODEL_DIR = os.path.join(OUTPUT_DIR, "models") if not os.path.exists(MODEL_DIR): os.makedirs(MODEL_DIR) # load the best / last / specified model if args.eval_auto: if args.eval_best: MODEL_FILE = os.path.join(MODEL_DIR, 'training_best.pt') else: # if args.eval_last: MODEL_FILE = os.path.join(MODEL_DIR, 'training_last.pt') else: # if args.eval_spec: MODEL_FILE = os.path.join(MODEL_DIR, 'training_%d.pt' % args.eval_epoch) assert os.path.exists(MODEL_FILE), \ "The specified model is not found in the folder." checkpoint = torch.load(MODEL_FILE) eval_epoch = checkpoint.get("epoch", args.eval_epoch) model.load_state_dict(checkpoint["model_state_dict"]) # path to the results (images and statistics) RESULT_DIR = os.path.join(OUTPUT_DIR, "results") if not os.path.exists(RESULT_DIR): os.makedirs(RESULT_DIR) RESULT_IMG = os.path.join( RESULT_DIR, "test_images_" + str(eval_epoch) + "_" + str(future_frames)) if not os.path.exists(RESULT_IMG): os.makedirs(RESULT_IMG) RESULT_STAT = os.path.join(RESULT_DIR, "test_stats") if not os.path.exists(RESULT_STAT): os.makedirs(RESULT_STAT) RESULT_STAT = os.path.join(RESULT_STAT, 'epoch_%d' % eval_epoch) ## Main script for test phase MSE = [0.] * future_frames PSNR = [0.] * future_frames SSIM = [0.] * future_frames PIPS = [0.] * future_frames with torch.no_grad(): model.eval() samples = 0 for frames in test_data_loader: samples += args.batch_size # 5-th order: batch_size x total_frames x channels x height x width frames = frames.permute(0, 1, 4, 2, 3).to(device) inputs = frames[:, :input_frames] origin = frames[:, -future_frames:] pred = model(inputs, input_frames=input_frames, future_frames=future_frames, output_frames=future_frames, teacher_forcing=False) # clamp the output to [0, 1] pred = torch.clamp(pred, min=0, max=1) # save the first sample for each batch to the folder if samples % args.log_samples == 0: print("samples: ", samples) input_0 = inputs[0, list_input_frames] origin_0 = origin[0, list_future_frames] pred_0 = pred[0, list_future_frames] # pad the input with zeros (if needed) if plot_input_frames < plot_future_frames: input_0 = torch.cat([ torch.zeros(plot_future_frames - plot_input_frames, args.img_channels, args.img_height, args.img_width, device=device), input_0 ], dim=0) img = torchvision.utils.make_grid(torch.cat( [input_0, origin_0, pred_0], dim=0), nrow=plot_future_frames) RESULT_FILE = os.path.join( RESULT_IMG, "cmp_%d_%d.jpg" % (eval_epoch, samples)) torchvision.utils.save_image(img, RESULT_FILE) # accumlate the statistics per frame for t in range(-future_frames, 0): origin_, pred_ = origin[:, t], pred[:, t] if not img_colored: origin_ = origin_.repeat([1, 3, 1, 1]) pred_ = pred_.repeat([1, 3, 1, 1]) dist = PSmodel(origin_, pred_) PIPS[t] += torch.sum(dist).item() origin = origin.permute(0, 1, 3, 4, 2).cpu().numpy() pred = pred.permute(0, 1, 3, 4, 2).cpu().numpy() for t in range(-future_frames, 0): for i in range(args.batch_size): origin_, pred_ = origin[i, t], pred[i, t] if not img_colored: origin_ = np.squeeze(origin_, axis=-1) pred_ = np.squeeze(pred_, axis=-1) MSE[t] += skimage.measure.compare_mse(origin_, pred_) PSNR[t] += skimage.measure.compare_psnr(origin_, pred_) SSIM[t] += skimage.measure.compare_ssim( origin_, pred_, multichannel=img_colored) for t in range(future_frames): MSE[t] /= test_size PSNR[t] /= test_size SSIM[t] /= test_size PIPS[t] /= test_size # compute the average statistics MSE_AVG = sum(MSE) / future_frames PSNR_AVG = sum(PSNR) / future_frames SSIM_AVG = sum(SSIM) / future_frames PIPS_AVG = sum(PIPS) / future_frames print("Epoch {}, MSE: {} (x1e-3); PSNR: {}, SSIM: {}, PIPS: {}".format( eval_epoch, 1e3 * MSE_AVG, PSNR_AVG, SSIM_AVG, PIPS_AVG)) print("PSNR:", PSNR) print("SSIM:", SSIM) print("PIPS:", PIPS) np.savez(RESULT_STAT, MSE=MSE, PSNR=PSNR, SSIM=SSIM, PIPS=PIPS) print('--------------------------------------------------------------')
def eval_model(args, model, loader, device, use_gt=False, use_feats=False, filter_box=False): all_losses = defaultdict(list) all_boxes = defaultdict(list) total_iou = [] total_boxes = 0 num_batches = 0 num_samples = 0 mae_per_image = [] mae_roi_per_image = [] roi_only_iou = [] ssim_per_image = [] ssim_rois = [] rois = 0 margin = 2 ## Initializing the perceptual loss model lpips_model = models.PerceptualLoss(model='net-lin', net='alex', use_gpu=True) perceptual_error_image = [] perceptual_error_roi = [] # --------------------------------------- with torch.no_grad(): for batch in tqdm.tqdm(loader): num_batches += 1 # if num_batches > 10: # break batch = [tensor.to(device) for tensor in batch] masks = None if len(batch) == 6: imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch elif len(batch) == 7: imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch elif len(batch) == 12: imgs, objs, boxes, triples, obj_to_img, triple_to_img, \ objs_r, boxes_r, triples_r, obj_to_img_r, triple_to_img_r, imgs_in = batch elif len(batch) == 13: imgs, objs, boxes, triples, obj_to_img, triple_to_img, attributes, \ objs_r, boxes_r, triples_r, obj_to_img_r, triple_to_img_r, imgs_in = batch else: assert False predicates = triples[:, 1] # #EVAL_ALL = True if EVAL_ALL: imgs, imgs_in, objs, boxes, triples, obj_to_img, \ dropbox_indices, dropfeats_indices = process_batch( imgs, imgs_in, objs, boxes, triples, obj_to_img, triple_to_img, device, use_feats=use_feats, filter_box=filter_box) else: dropbox_indices = None dropfeats_indices = None # # if use_gt: # gt boxes # model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=masks, src_image=imgs_in, # drop_box_idx=None, drop_feat_idx=dropfeats_indices, mode='eval') # else: # model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, src_image=imgs_in, # drop_box_idx=dropbox_indices, drop_feats_idx=dropfeats_indices, mode='eval') masks_gt = None gt_train = False attributes = torch.zeros_like(attributes) all_features = None # Run the model with predicted masks model_out = model(imgs, objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=masks_gt, attributes=attributes, gt_train=gt_train, test_mode=False, use_gt_box=True, features=all_features, drop_box_idx=dropbox_indices, drop_feat_idx=dropfeats_indices, src_image=imgs_in) #imgs_pred, boxes_pred, masks_pred, _, layout, _ = model_out # OUTPUT imgs_pred, boxes_pred, masks_pred, predicate_scores, layout, _ = model_out # -------------------------------------------------------------------------------------------------------------- #imgs_pred *= 3 #print(imgs_pred.min(), imgs_pred.max()) # Save all box predictions all_boxes['boxes_gt'].append(boxes) all_boxes['objs'].append(objs) all_boxes['boxes_pred'].append(boxes_pred) all_boxes['drop_targets'].append(dropbox_indices) # IoU over all total_iou.append(jaccard(boxes_pred, boxes).cpu().numpy()) #.detach() total_boxes += boxes_pred.size(0) # IoU over targets only pred_dropbox = boxes_pred[dropbox_indices.squeeze() == 0, :] gt_dropbox = boxes[dropbox_indices.squeeze() == 0, :] roi_only_iou.append( jaccard(pred_dropbox, gt_dropbox).detach().cpu().numpy()) rois += pred_dropbox.size(0) # assert(pred_dropbox.size(0) == imgs.size(0)) num_samples += imgs.shape[0] imgs = imagenet_deprocess_batch(imgs).float() imgs_pred = imagenet_deprocess_batch(imgs_pred).float() # Uncomment to plot images (for debugging purposes) #visualize_imgs_boxes(imgs, imgs_pred, boxes, boxes) # MAE per image mae_per_image.append( torch.mean( torch.abs(imgs - imgs_pred).view(imgs.shape[0], -1), 1).cpu().numpy()) for s in range(imgs.shape[0]): # get coordinates of target left, right, top, bottom = bbox_coordinates_with_margin( boxes[s, :], margin, imgs) # calculate errors only in RoI one by one - good, i wanted to check this too since the errors were suspicious pheww mae_roi_per_image.append( torch.mean( torch.abs(imgs[s, :, top:bottom, left:right] - imgs_pred[s, :, top:bottom, left:right])).cpu().item()) ssim_per_image.append( pytorch_ssim.ssim(imgs[s:s + 1, :, :, :] / 255.0, imgs_pred[s:s + 1, :, :, :] / 255.0, window_size=3).cpu().item()) ssim_rois.append( pytorch_ssim.ssim( imgs[s:s + 1, :, top:bottom, left:right] / 255.0, imgs_pred[s:s + 1, :, top:bottom, left:right] / 255.0, window_size=3).cpu().item()) imgs_pred_norm = imgs_pred[ s:s + 1, :, :, :] / 127.5 - 1 # = util.im2tensor(imgs_pred[s:s+1, :, :, :].detach().cpu().numpy()) imgs_gt_norm = imgs[ s:s + 1, :, :, :] / 127.5 - 1 # util.im2tensor(imgs[s:s+1, :, :, :].detach().cpu().numpy()) #perceptual_error_roi.append(lpips_model.forward(imgs_pred_norm[:,:, top:bottom, left:right], # imgs_gt_norm[:,:, top:bottom, left:right])) #print(imgs_pred_norm.shape) perceptual_error_image.append( lpips_model.forward(imgs_pred_norm, imgs_gt_norm).detach().cpu().numpy()) if num_batches % PRINT_EVERY == 0: calculate_scores(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou, ssim_per_image, ssim_rois, perceptual_error_image, perceptual_error_roi) if num_batches % SAVE_EVERY == 0: save_results(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou, ssim_per_image, ssim_rois, perceptual_error_image, perceptual_error_roi, all_boxes, num_batches) # mean_losses = {k: np.mean(v) for k, v in all_losses.items()} save_results(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou, ssim_per_image, ssim_rois, perceptual_error_image, perceptual_error_roi, all_boxes, 'final') # masks_to_store = masks # if masks_to_store is not None: # masks_to_store = masks_to_store.data.cpu().clone() # masks_pred_to_store = masks_pred # if masks_pred_to_store is not None: # masks_pred_to_store = masks_pred_to_store.data.cpu().clone() # batch_data = { # 'objs': objs.detach().cpu().clone(), # 'boxes_gt': boxes.detach().cpu().clone(), # 'masks_gt': masks_to_store, # 'triples': triples.detach().cpu().clone(), # 'obj_to_img': obj_to_img.detach().cpu().clone(), # 'triple_to_img': triple_to_img.detach().cpu().clone(), # 'boxes_pred': boxes_pred.detach().cpu().clone(), # 'masks_pred': masks_pred_to_store # } # out = [mean_losses, samples, batch_data, avg_iou] # out = [mean_losses, mean_L1, avg_iou] return # mae_per_image, mae_roi_per_image, total_iou, roi_only_iou
def run_model(args, checkpoint, loader=None): output_dir = args.exp_dir model = build_model(args, checkpoint) if loader is None: loader = build_eval_loader(args, checkpoint, vocab_t) img_dir = makedir(output_dir, 'images_' + SPLIT) graph_json_dir = makedir(output_dir, 'graphs_json') f = open(output_dir + "/result_ids.txt", "w") img_idx = 0 total_iou_all = [] total_iou = get_def_dict() total_boxes = 0 mae_per_image_all = [] mae_per_image = get_def_dict() mae_roi_per_image_all = [] mae_roi_per_image = get_def_dict() roi_only_iou_all = [] roi_only_iou = get_def_dict() ssim_per_image_all = [] ssim_per_image = get_def_dict() ssim_rois_all = [] ssim_rois = get_def_dict() rois = 0 margin = 2 ## Initializing the perceptual loss model lpips_model = models.PerceptualLoss(model='net-lin', net='alex', use_gpu=True) perceptual_error_image_all = [] perceptual_error_image = get_def_dict() perceptual_error_roi_all = [] perceptual_error_roi = get_def_dict() for batch in loader: imgs, imgs_src, objs, objs_src, boxes, boxes_src, triples, triples_src, obj_to_img, \ triple_to_img, imgs_in = [x.cuda() for x in batch] imgs_gt = imagenet_deprocess_batch(imgs_src) imgs_target_gt = imagenet_deprocess_batch(imgs) # Get mode from target scene - source scene, or image id, using sets graph_set_bef = Counter(tuple(row) for row in tripleToObjID(triples_src, objs_src)) obj_set_bef = Counter([int(obj.cpu()) for obj in objs_src]) graph_set_aft = Counter(tuple(row) for row in tripleToObjID(triples, objs)) obj_set_aft = Counter([int(obj.cpu()) for obj in objs]) if len(objs) > len(objs_src): mode = "addition" changes = graph_set_aft - graph_set_bef obj_ids = list(obj_set_aft - obj_set_bef) new_ids = (objs == obj_ids[0]).nonzero() elif len(objs) < len(objs_src): mode = "remove" changes = graph_set_bef - graph_set_aft obj_ids = list(obj_set_bef - obj_set_aft) new_ids_src = (objs_src == obj_ids[0]).nonzero() new_objs = [obj for obj in objs] new_objs.append(objs_src[new_ids_src[0]]) objs = torch.tensor(new_objs).cuda() num_objs = len(objs) new_ids = [torch.tensor(num_objs-1)] new_boxes = [bbox for bbox in boxes] new_boxes.append(boxes_src[new_ids_src[0]][0]) boxes = torch.stack(new_boxes) obj_to_img = torch.zeros(num_objs, dtype=objs.dtype, device=objs.device) elif torch.all(torch.eq(objs, objs_src)): mode = "reposition" changes = (graph_set_bef - graph_set_aft) + (graph_set_aft - graph_set_bef) idx_cnt = np.zeros((25,1)) for [s,p,o] in list(changes): idx_cnt[s] += 1 idx_cnt[o] += 1 obj_ids = idx_cnt.argmax(0) id_src = (objs_src == obj_ids[0]).nonzero() box_src = boxes_src[id_src[0]] new_ids = (objs == obj_ids[0]).nonzero() boxes[new_ids[0]] = box_src elif len(objs) == len(objs_src): mode = "replace" changes = (graph_set_bef - graph_set_aft) + (graph_set_aft - graph_set_bef) obj_ids = [list(obj_set_bef - obj_set_aft)[0], list(obj_set_aft - obj_set_bef)[0]] new_ids = (objs == obj_ids[1]).nonzero() else: assert False new_ids = [int(new_id.cpu()) for new_id in new_ids] show_im = False if show_im: img_gt = imgs_gt[0].numpy().transpose(1, 2, 0) img_gt_target = imgs_target_gt[0].numpy().transpose(1, 2, 0) fig = plt.figure() fig.add_subplot(1, 2, 1) plt.imshow(img_gt) fig.add_subplot(1, 2, 2) plt.imshow(img_gt_target) plt.show(block=True) query_feats = None if args.with_query_image: img, box = query_image_by_semantic_id(new_ids, img_idx, loader) query_feats = model.forward_visual_feats(img, box) img_filename_query = '%04d_query.png' % (img_idx) img = imagenet_deprocess_batch(img) img_np = img[0].numpy().transpose(1, 2, 0).astype(np.uint8) img_path = os.path.join(img_dir, img_filename_query) imsave(img_path, img_np) img_gt_filename = '%04d_gt_src.png' % (img_idx) img_target_gt_filename = '%04d_gt_target.png' % (img_idx) img_pred_filename = '%04d_changed.png' % (img_idx) img_filename_noised = '%04d_noised.png' % (img_idx) triples_ = triples boxes_gt = boxes keep_box_idx = torch.ones_like(objs.unsqueeze(1), dtype=torch.float) keep_feat_idx = torch.ones_like(objs.unsqueeze(1), dtype=torch.float) keep_image_idx = torch.ones_like(objs.unsqueeze(1), dtype=torch.float) subject_node = new_ids[0] keep_image_idx[subject_node] = 0 if mode == 'reposition': keep_box_idx[subject_node] = 0 elif mode == "remove": keep_feat_idx[subject_node] = 0 else: if mode == "replace": keep_feat_idx[subject_node] = 0 if mode == 'auto_withfeats': keep_image_idx[subject_node] = 0 if mode == 'auto_nofeats': if not args.with_query_image: keep_feat_idx[subject_node] = 0 model_out = model(objs, triples_, obj_to_img, boxes_gt=boxes_gt, masks_gt=None, src_image=imgs_in, mode=mode, query_feats=query_feats, keep_box_idx=keep_box_idx, keep_feat_idx=keep_feat_idx, keep_image_idx=keep_image_idx) imgs_pred, boxes_pred_o, masks_pred, noised_srcs, _ = model_out imgs = imagenet_deprocess_batch(imgs).float() imgs_pred = imagenet_deprocess_batch(imgs_pred).float() #Metrics # IoU over all curr_iou = jaccard(boxes_pred_o, boxes).detach().cpu().numpy() total_iou_all.append(curr_iou) total_iou[mode].append(curr_iou) total_boxes += boxes_pred_o.size(0) # IoU over targets only pred_dropbox = boxes_pred_o[keep_box_idx.squeeze() == 0, :] gt_dropbox = boxes[keep_box_idx.squeeze() == 0, :] curr_iou_roi = jaccard(pred_dropbox, gt_dropbox).detach().cpu().numpy() roi_only_iou_all.append(curr_iou_roi) roi_only_iou[mode].append(curr_iou_roi) rois += pred_dropbox.size(0) # MAE per image curr_mae = torch.mean( torch.abs(imgs - imgs_pred).view(imgs.shape[0], -1), 1).cpu().numpy() mae_per_image[mode].append(curr_mae) mae_per_image_all.append(curr_mae) for s in range(imgs.shape[0]): # get coordinates of target left, right, top, bottom = bbox_coordinates_with_margin(boxes[s, :], margin, imgs) if left > right or top > bottom: continue # print("bboxes with margin: ", left, right, top, bottom) # calculate errors only in RoI one by one curr_mae_roi = torch.mean( torch.abs(imgs[s, :, top:bottom, left:right] - imgs_pred[s, :, top:bottom, left:right])).cpu().item() mae_roi_per_image[mode].append(curr_mae_roi) mae_roi_per_image_all.append(curr_mae_roi) curr_ssim = pytorch_ssim.ssim(imgs[s:s + 1, :, :, :] / 255.0, imgs_pred[s:s + 1, :, :, :] / 255.0, window_size=3).cpu().item() ssim_per_image_all.append(curr_ssim) ssim_per_image[mode].append(curr_ssim) curr_ssim_roi = pytorch_ssim.ssim(imgs[s:s + 1, :, top:bottom, left:right] / 255.0, imgs_pred[s:s + 1, :, top:bottom, left:right] / 255.0, window_size=3).cpu().item() ssim_rois_all.append(curr_ssim_roi) ssim_rois[mode].append(curr_ssim_roi) imgs_pred_norm = imgs_pred[s:s + 1, :, :, :] / 127.5 - 1 imgs_gt_norm = imgs[s:s + 1, :, :, :] / 127.5 - 1 curr_lpips = lpips_model.forward(imgs_pred_norm, imgs_gt_norm).detach().cpu().numpy() perceptual_error_image_all.append(curr_lpips) perceptual_error_image[mode].append(curr_lpips) for i in range(imgs_pred.size(0)): if args.save_imgs: img_gt = imgs_gt[i].numpy().transpose(1, 2, 0).astype(np.uint8) img_gt = cv2.resize(img_gt, (128, 128)) img_gt_path = os.path.join(img_dir, img_gt_filename) imsave(img_gt_path, img_gt) img_gt_target = imgs_target_gt[i].numpy().transpose(1, 2, 0).astype(np.uint8) img_gt_target = cv2.resize(img_gt_target, (128, 128)) img_gt_target_path = os.path.join(img_dir, img_target_gt_filename) imsave(img_gt_target_path, img_gt_target) noised_src_np = imagenet_deprocess_batch(noised_srcs[:, :3, :, :]) noised_src_np = noised_src_np[i].numpy().transpose(1, 2, 0).astype(np.uint8) noised_src_np = cv2.resize(noised_src_np, (128, 128)) img_path_noised = os.path.join(img_dir, img_filename_noised) imsave(img_path_noised, noised_src_np) img_pred_np = imgs_pred[i].numpy().transpose(1, 2, 0).astype(np.uint8) img_pred_np = cv2.resize(img_pred_np, (128, 128)) img_path = os.path.join(img_dir, img_pred_filename) imsave(img_path, img_pred_np) save_graph_json(objs, triples, boxes, "after", graph_json_dir, img_idx) img_idx += 1 if img_idx % print_every == 0: calculate_scores(mae_per_image_all, mae_roi_per_image_all, total_iou_all, roi_only_iou_all, ssim_per_image_all, ssim_rois_all, perceptual_error_image_all, perceptual_error_roi_all) calculate_scores_modes(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou, ssim_per_image, ssim_rois, perceptual_error_image, perceptual_error_roi) print('Saved %d images' % img_idx) f.close()
def __init__(self, weight=1.0, net='alex', use_gpu=True): """ Wrapper for PerceptualSimilarity.models.PerceptualLoss """ self.model = models.PerceptualLoss(net=net, use_gpu=use_gpu) self.weight = weight