def __getitem__(self, idx): scale = self.opt.get('scale', 4) HR_size = self.opt.get('HR_size', 128) LR_size = HR_size // scale idx_center = (self.num_frames - 1) // 2 ds_kernel = None # Default case: tensor will result in the [0,1] range # Alternative: tensor will be z-normalized to the [-1,1] range znorm = self.opt.get('znorm', False) if self.opt['phase'] == 'train': if self.opt.get('lr_downscale', None) and self.opt.get('dataroot_kernels', None) and 999 in self.opt["lr_downscale_types"]: ds_kernel = self.ds_kernels #KernelDownscale(scale, self.kernel_paths, self.num_kernel) # get a random video directory idx_video = random.randint(0, len(self.video_list)-1) video_dir = self.video_list[idx_video] # print(video_dir) else: # only one video and paths_LR/paths_HR is already the video dir video_dir = "" # list the frames in the directory # hr_dir = self.trainset_dir + '/' + video_dir + '/hr' paths_HR = util.get_image_paths(self.opt['data_type'], os.path.join(self.paths_HR, video_dir)) # print(paths_HR) if self.opt['phase'] == 'train': # random reverse augmentation random_reverse = self.opt.get('random_reverse', False) # skipping intermediate frames to learn from low FPS videos augmentation # testing random frameskip up to 'max_frameskip' frames max_frameskip = self.opt.get('max_frameskip', 0) if max_frameskip > 0: max_frameskip = min(max_frameskip, len(paths_HR)//(self.num_frames-1)) frameskip = random.randint(1, max_frameskip) else: frameskip = 1 # print("max_frameskip: ", max_frameskip) assert ((self.num_frames-1)*frameskip) <= (len(paths_HR)-1), ( f'num_frame*frameskip must be smaller than the number of frames per video, check {video_dir}') # if number of frames of training video is for example 31, "max index -num_frames" = 31-3=28 idx_frame = random.randint(0, (len(paths_HR)-1)-((self.num_frames-1)*frameskip)) # print('frameskip:', frameskip) else: frameskip = 1 idx_frame = idx ''' List based frames loading ''' if self.paths_LR: paths_LR = util.get_image_paths(self.opt['data_type'], os.path.join(self.paths_LR, video_dir)) else: paths_LR = paths_HR ds_algo = 777 # default to matlab-like bicubic downscale if self.opt.get('lr_downscale', None): # if manually set and scale algorithms are provided, then: ds_algo = self.opt.get('lr_downscale_types', 777) # get the video directory HR_dir, _ = os.path.split(paths_HR[idx_frame]) LR_dir, _ = os.path.split(paths_HR[idx_frame]) # read HR & LR frames HR_list = [] LR_list = [] resize_type = None LR_bicubic = None HR_center = None # print('len(paths_HR)', len(paths_HR)) for i_frame in range(self.num_frames): # print('frame path:', paths_HR[int(idx_frame)+(frameskip*i_frame)]) HR_img = util.read_img(None, paths_HR[int(idx_frame)+(frameskip*i_frame)], out_nc=self.image_channels) HR_img = util.modcrop(HR_img, scale) if self.opt['phase'] == 'train': ''' If using individual image augmentations, get cropping parameters for reuse ''' if self.otf_noise and i_frame == 0: #only need to calculate once, from the first frame # reuse the cropping parameters for all LR and HR frames hr_crop_params, lr_crop_params = get_crop_params(HR_img, LR_size, scale) if self.opt.get('lr_noise', None): # reuse the same noise type for all the frames noise_option = get_noise(self.opt.get('lr_noise_types', None), self.noise_patches) if self.opt.get('lr_blur', None): # reuse the same blur type for all the frames blur_option = get_blur(self.opt.get('lr_blur_types', None)) if self.paths_LR: # LR images are provided at the correct scale LR_img = util.read_img(None, paths_LR[int(idx_frame)+(frameskip*i_frame)], out_nc=self.image_channels) if LR_img.shape == HR_img.shape: LR_img, resize_type = Scale(img=HR_img, scale=scale, algo=ds_algo, ds_kernel=ds_kernel, resize_type=resize_type) else: # generate LR images on the fly LR_img, resize_type = Scale(img=HR_img, scale=scale, algo=ds_algo, ds_kernel=ds_kernel, resize_type=resize_type) # get the bicubic upscale of the center frame to concatenate for SR if self.y_only and self.srcolors and i_frame == idx_center: LR_bicubic, _ = Scale(img=LR_img, scale=1/scale, algo=777) # bicubic upscale HR_center = HR_img # tmp_vis(LR_bicubic, False) # tmp_vis(HR_center, False) if self.y_only: # extract Y channel from frames # normal path, only Y for both HR_img = util.bgr2ycbcr(HR_img, only_y=True) LR_img = util.bgr2ycbcr(LR_img, only_y=True) # crop patches randomly if using otf noise #TODO: make a BasicSR composable random_crop #TODO: note the original crop should go here and crop after loading each image, but could also be much simpler # to crop after concatenating. Check the speed difference. if self.otf_noise and self.opt['phase'] == 'train': HR_img, LR_img = apply_crop_params(HR_img, LR_img, hr_crop_params, lr_crop_params) if self.y_only and self.srcolors and i_frame == idx_center: LR_bicubic, _ = apply_crop_params(LR_bicubic, None, hr_crop_params, None) HR_center, _ = apply_crop_params(HR_center, None, hr_crop_params, None) # expand Y images to add the channel dimension # normal path, only Y for both if self.y_only: HR_img = util.fix_img_channels(HR_img, 1) LR_img = util.fix_img_channels(LR_img, 1) if self.opt['phase'] == 'train': # single frame augmentation (noise, blur, etc). Would only be efficient if patches are cropped in this loop if self.opt.get('lr_blur', None): if blur_option: LR_img = blur_option(LR_img) if self.opt.get('lr_noise', None): if noise_option: LR_img = noise_option(LR_img) # expand LR images to add the channel dimension again if needed (blur removes the grayscale channel) #TODO: add a if condition, can compare to the ndim before the augs, maybe move inside the aug condition # if not fullimgchannels: #TODO: TMP, this should be when using srcolors for HR or when training with 3 channels tests, separatedly if self.y_only: LR_img = util.fix_img_channels(LR_img, 1) # print("HR_img.shape: ", HR_img.shape) # print("LR_img.shape", LR_img.shape) HR_list.append(HR_img) # h, w, c LR_list.append(LR_img) # h, w, c # print(len(HR_list)) # print(len(LR_list)) if self.opt['phase'] == 'train': # random reverse sequence augmentation if random_reverse and random.random() < 0.5: HR_list.reverse() LR_list.reverse() if not self.y_only: t = self.num_frames HR = [np.asarray(GT) for GT in HR_list] # list -> numpy # input: list (contatin numpy: [H,W,C]) HR = np.asarray(HR) # numpy, [T,H,W,C] h_HR, w_HR, c = HR_img.shape #HR_center.shape #TODO: check, may be risky HR = HR.transpose(1,2,3,0).reshape(h_HR, w_HR, -1) # numpy, [H',W',CT] LR = [np.asarray(LT) for LT in LR_list] # list -> numpy # input: list (contatin numpy: [H,W,C]) LR = np.asarray(LR) # numpy, [T,H,W,C] LR = LR.transpose(1,2,3,0).reshape(h_HR//scale, w_HR//scale, -1) # numpy, [Hl',Wl',CT] else: HR = np.concatenate((HR_list), axis=2) # h, w, t LR = np.concatenate((LR_list), axis=2) # h, w, t if self.opt['phase'] == 'train': ''' # If not using individual image augmentations, this cropping should be faster, only once ''' # crop patches randomly. If not using otf noise, crop all concatenated images if not self.otf_noise: HR, LR, hr_crop_params, _ = random_crop_mod(HR, LR, LR_size, scale) if self.y_only and self.srcolors: LR_bicubic, _, _, _ = random_crop_mod(LR_bicubic, _, LR_size, scale, hr_crop_params) HR_center, _, _, _ = random_crop_mod(HR_center, _, LR_size, scale, hr_crop_params) # tmp_vis(LR_bicubic, False) # tmp_vis(HR_center, False) # data augmentation #TODO: use BasicSR augmentations #TODO: use variables from config LR, HR, LR_bicubic, HR_center = augmentation()([LR, HR, LR_bicubic, HR_center]) # tmp_vis(HR, False) # tmp_vis(LR, False) # tmp_vis(LR_bicubic, False) # tmp_vis(HR_center, False) if self.y_only: HR = util.np2tensor(HR, normalize=znorm, bgr2rgb=False, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=False, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] else: HR = util.np2tensor(HR, normalize=znorm, bgr2rgb=True, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=True, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] #TODO: TMP to test generating 3 channel images for SR loss # HR = util.np2tensor(HR, normalize=znorm, bgr2rgb=False, add_batch=True) # Tensor, [CT',H',W'] or [T, H, W] # LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=False, add_batch=True) # Tensor, [CT',H',W'] or [T, H, W] # if self.srcolors: # HR = HR.view(c,t,HR_size,HR_size) # Tensor, [C,T,H,W] if not self.y_only: HR = HR.view(c,t,HR_size,HR_size) # Tensor, [C,T,H,W] LR = LR.view(c,t,LR_size,LR_size) # Tensor, [C,T,H,W] if self.shape == 'TCHW': HR = HR.transpose(0,1) # Tensor, [T,C,H,W] LR = LR.transpose(0,1) # Tensor, [T,C,H,W] # generate Cr, Cb channels using bicubic interpolation #TODO: check, it might be easier to return the whole image and separate later when needed if self.y_only and self.srcolors: LR_bicubic = util.bgr2ycbcr(LR_bicubic, only_y=False) # HR_center = util.bgr2ycbcr(HR_center, only_y=False) #not needed, can directly use rgb image ## LR_bicubic = util.ycbcr2rgb(LR_bicubic, only_y=False) #test, looks ok ## HR_center = util.ycbcr2rgb(HR_center, only_y=False) #test, looks ok ## _, SR_cb, SR_cr = util.bgr2ycbcr(LR_bicubic, only_y=False, separate=True) LR_bicubic = util.np2tensor(LR_bicubic, normalize=znorm, bgr2rgb=False, add_batch=False) # HR_center = util.np2tensor(HR_center, normalize=znorm, bgr2rgb=False, add_batch=False) # will test using rgb image instead HR_center = util.np2tensor(HR_center, normalize=znorm, bgr2rgb=True, add_batch=False) #TODO: TMP to test generating 3 channel images for SR loss # LR_bicubic = util.np2tensor(LR_bicubic, normalize=znorm, bgr2rgb=False, add_batch=True) # HR_center = util.np2tensor(HR_center, normalize=znorm, bgr2rgb=False, add_batch=True) elif self.y_only and not self.srcolors: LR_bicubic = [] HR_center = [] else: HR_center = HR[:,idx_center,:,:] if self.shape == 'CTHW' else HR[idx_center,:,:,:] LR_bicubic = [] # return toTensor(LR), toTensor(HR) return {'LR': LR, 'HR': HR, 'LR_path': LR_dir, 'HR_path': HR_dir, 'LR_bicubic': LR_bicubic, 'HR_center': HR_center}
def __getitem__(self, idx): scale = self.opt.get('scale', 4) idx_center = (self.num_frames - 1) // 2 h_LR = None w_LR = None # Default case: tensor will result in the [0,1] range # Alternative: tensor will be z-normalized to the [-1,1] range znorm = self.opt.get('znorm', False) # only one video and paths_LR/paths_HR is already the video dir video_dir = "" # list the frames in the directory # hr_dir = self.trainset_dir + '/' + video_dir + '/hr' ''' List based frames loading ''' paths_LR = util.get_image_paths(self.opt['data_type'], os.path.join(self.paths_LR, video_dir)) assert self.num_frames <= len(paths_LR), ( f'num_frame must be smaller than the number of frames per video, check {video_dir}') idx_frame = idx LR_name = paths_LR[idx_frame + 1] # center frame # print(LR_name) # print(len(self.video_list)) # read LR frames # HR_list = [] LR_list = [] resize_type = None LR_bicubic = None for i_frame in range(self.num_frames): if idx_frame == len(self.video_list)-2 and self.num_frames == 3: # print("second to last frame:", i_frame) if i_frame == 0: LR_img = util.read_img(None, paths_LR[int(idx_frame)], out_nc=self.image_channels) else: LR_img = util.read_img(None, paths_LR[int(idx_frame)+1], out_nc=self.image_channels) elif idx_frame == len(self.video_list)-1 and self.num_frames == 3: # print("last frame:", i_frame) LR_img = util.read_img(None, paths_LR[int(idx_frame)], out_nc=self.image_channels) # every other internal frame else: # print("normal frame:", idx_frame) LR_img = util.read_img(None, paths_LR[int(idx_frame)+(i_frame)], out_nc=self.image_channels) #TODO: check if this is necessary LR_img = util.modcrop(LR_img, scale) # get the bicubic upscale of the center frame to concatenate for SR if not self.y_only and self.srcolors and i_frame == idx_center: if self.opt.get('denoise_LRbic', False): LR_bicubic = transforms.RandomAverageBlur(p=1, kernel_size=3)(LR_img) # LR_bicubic = transforms.RandomBoxBlur(p=1, kernel_size=3)(LR_img) else: LR_bicubic = LR_img LR_bicubic, _ = Scale(img=LR_bicubic, scale=1/scale, algo=777) # bicubic upscale # HR_center = HR_img # tmp_vis(LR_bicubic, False) # tmp_vis(HR_center, False) if self.y_only: # extract Y channel from frames # normal path, only Y for both LR_img = util.bgr2ycbcr(LR_img, only_y=True) # expand Y images to add the channel dimension # normal path, only Y for both LR_img = util.fix_img_channels(LR_img, 1) # print("HR_img.shape: ", HR_img.shape) # print("LR_img.shape", LR_img.shape) LR_list.append(LR_img) # h, w, c if not self.y_only and (not h_LR or not w_LR): h_LR, w_LR, c = LR_img.shape if not self.y_only: t = self.num_frames LR = [np.asarray(LT) for LT in LR_list] # list -> numpy # input: list (contatin numpy: [H,W,C]) LR = np.asarray(LR) # numpy, [T,H,W,C] LR = LR.transpose(1,2,3,0).reshape(h_LR, w_LR, -1) # numpy, [Hl',Wl',CT] else: LR = np.concatenate((LR_list), axis=2) # h, w, t if self.y_only: LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=False, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] else: LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=True, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] LR = LR.view(c,t,h_LR,w_LR) # Tensor, [C,T,H,W] if self.shape == 'TCHW': LR = LR.transpose(0,1) # Tensor, [T,C,H,W] if self.y_only and self.srcolors: # generate Cr, Cb channels using bicubic interpolation LR_bicubic = util.bgr2ycbcr(LR_bicubic, only_y=False) LR_bicubic = util.np2tensor(LR_bicubic, normalize=znorm, bgr2rgb=False, add_batch=False) HR_center = [] else: LR_bicubic = [] HR_center = [] # return toTensor(LR), toTensor(HR) return {'LR': LR, 'LR_path': LR_name, 'LR_bicubic': LR_bicubic, 'HR_center': HR_center}