def get_data_loader(opt, batch_size, num_workers, logger): data_dir = opt.data_dir normalize = video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) scale_ratios = [1.0, 0.875, 0.75, 0.66] input_size = opt.input_size def batch_fn(batch, ctx): if opt.num_segments > 1: data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False, multiplier=opt.num_segments) else: data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) return data, label transform_train = transforms.Compose([ video.VideoMultiScaleCrop(size=(input_size, input_size), scale_ratios=scale_ratios), video.VideoRandomHorizontalFlip(), video.VideoToTensor(), normalize ]) transform_test = transforms.Compose([ video.VideoCenterCrop(size=input_size), video.VideoToTensor(), normalize ]) train_dataset = ucf101.classification.UCF101(setting=opt.train_list, root=data_dir, train=True, new_width=opt.new_width, new_height=opt.new_height, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_train) val_dataset = ucf101.classification.UCF101(setting=opt.val_list, root=data_dir, train=False, new_width=opt.new_width, new_height=opt.new_height, target_width=input_size, target_height=input_size, num_segments=opt.num_segments, transform=transform_test) logger.info('Load %d training samples and %d validation samples.' % (len(train_dataset), len(val_dataset))) if opt.num_segments > 1: train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, batchify_fn=tsn_mp_batchify_fn) else: train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_data, val_data, batch_fn
################################################################ # Data Augmentation and Data Loader # --------------------------------- # # Data augmentation for video is different from image. For example, if you # want to randomly crop a video sequence, you need to make sure all the video # frames in this sequence undergo the same cropping process. We provide a # new set of transformation functions, working with multiple images. # Please checkout the `video.py <../../../gluoncv/data/transforms/video.py>`_ for more details. # Most video data augmentation strategies used here are introduced in [Wang15]_. transform_train = transforms.Compose([ # Fix the input video frames size as 256×340 and randomly sample the cropping width and height from # {256,224,192,168}. After that, resize the cropped regions to 224 × 224. video.VideoMultiScaleCrop(size=(224, 224), scale_ratios=[1.0, 0.875, 0.75, 0.66]), # Randomly flip the video frames horizontally video.VideoRandomHorizontalFlip(), # Transpose the video frames from height*width*num_channels to num_channels*height*width # and map values from [0, 255] to [0,1] video.VideoToTensor(), # Normalize the video frames with mean and standard deviation calculated across all images video.VideoNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) ################################################################## # With the transform functions, we can define data loaders for our # training datasets. # Batch Size for Each GPU per_device_batch_size = 25