def test_composevideo(self): img_transforms = [CenterCrop(30)] video_transforms = [RandomCropVideo(30)] compose = ComposeVideo(img_transforms, video_transforms) vid = self._video(10) vid = compose(vid) compose = ComposeVideo() vid = self._video(10) vid = compose(vid)
def __init__(self, train_dir, valid_dir, batch_size=200, n_frames=20): transforms = ComposeVideo([CenterCrop(128), Scale((224, 224))]) self.n_frames = n_frames self.train_dataset = GulpVideoDataset(train_dir, n_frames, 1, False, transform=transforms) self.train_loader = DataLoader(self.train_dataset, batch_size=10, shuffle=False, num_workers=8, drop_last=True) self.val_dataset = GulpVideoDataset(valid_dir, n_frames, 1, False, transform=transforms) self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=True)
def get_spot_check_transform(): img_transforms = [CenterCrop(512),] #video_transforms = [RandomCropVideo(512), RandHorFlipVideo(), RandVerFlipVideo(),] video_transforms = [RandHorFlipVideo(), RandVerFlipVideo(),] return ComposeVideo(img_transforms=img_transforms, video_transforms=video_transforms)
def test_compose(self): transforms = [CenterCrop(30)] compose = Compose(transforms) img = self._img() img = compose(img) compose = ComposeVideo() img = self._img() img = compose(img)
# I've only pushed it as an example. dataset_path = "./data" repo = "epic-kitchens/action-models" class_counts = (125, 352) segment_count = 8 base_model = "resnet50" batch_size = 1 segment_count = 8 snippet_length = 1 # Number of frames composing the snippet, 1 for RGB, 5 for optical flow snippet_channels = 3 # Number of channels in a frame, 3 for RGB, 2 for optical flow height, width = 224, 224 scale = ComposeVideo([Scale((height, width))]) # Eight segments each composed of one frame. # Each segment is ten seconds apart. dataset = video_dataset = VideoDataset(dataset_path, num_frames=snippet_length * segment_count, step_size=10, transform=scale, is_val=False) loader = DataLoader(dataset, batch_size=batch_size, num_workers=0, shuffle=False)
def main(): if args.optical_flow: dataset = OpticalFlowDataset(args.dataset_path, num_frames=args.max_frames, step_size=1) loader = DataLoaderPytorch(dataset, batch_size=args.video_batch_size, num_workers=0, shuffle=False) else: scale = ComposeVideo([Scale((HEIGHT, WIDTH))]) dataset = VideoDataset(args.dataset_path, num_frames=args.max_frames, step_size=1, is_val=True, transform=scale, stack=True, random_offset=False) loader = DataLoader(dataset, batch_size=args.video_batch_size, num_workers=0, shuffle=False) repo = "epic-kitchens/action-models" class_counts = (125, 352) base_model = "resnet50" t = "RGB" if not args.optical_flow else "Flow" model = torch.hub.load(repo, "TSM", class_counts, args.segment_count, t, base_model=base_model, pretrained="epic-kitchens").cuda() try: if args.optical_flow: checkpoint = torch.load(flow_checkpoint_path) model.load_state_dict(checkpoint['state_dict']) except: print("Unable to load TSM Optical Flow checkpoint.") print("Please download it from: https://data.bris.ac.uk/data/dataset/2tw6gdvmfj3f12papdy24flvmo") return 1 logits_dir = os.path.join("./output/", os.path.basename(os.path.abspath(args.dataset_path))) last_id, last_logits = None, [] if args.store_logits and not os.path.isdir(logits_dir): os.mkdir(logits_dir) results = defaultdict(list) for chunk, youtube_id in batch_it(loader): features = model.features(chunk) verb_logits, noun_logits = model.logits(features) verb_logits_cpu, noun_logits_cpu = verb_logits.cpu(), noun_logits.cpu() verbs = verb_logits_cpu.argmax(dim=1).numpy().tolist() nouns = noun_logits_cpu.argmax(dim=1).numpy().tolist() results[youtube_id].extend(list(zip(verbs, nouns))) if args.store_logits: if last_id is None: last_id = youtube_id last_logits = [verb_logits_cpu.numpy(), noun_logits_cpu.numpy()] continue if last_id != youtube_id: np.save(os.path.join(logits_dir, "{}_verb.npy".format(last_id)), last_logits[0]) np.save(os.path.join(logits_dir, "{}_noun.npy".format(last_id)), last_logits[1]) last_id = youtube_id last_logits = [verb_logits_cpu, noun_logits_cpu] else: last_logits[0] = np.concatenate((last_logits[0], verb_logits_cpu)) last_logits[1] = np.concatenate((last_logits[1], noun_logits_cpu)) pickle.dump(results, open(os.path.join("./output/", args.save_fname), "wb")) if args.store_logits: np.save(os.path.join(logits_dir, "{}_verb.npy".format(last_id)), last_logits[0]) np.save(os.path.join(logits_dir, "{}_noun.npy".format(last_id)), last_logits[1]) return 0
def get_test_transform(): img_transforms = [CenterCrop(512),] video_transforms = [VideoToTensor(),] return ComposeVideo(img_transforms=img_transforms, video_transforms=video_transforms)
def get_train_transform(): img_transforms = [] video_transforms = [RandomCropVideo(512), RandHorFlipVideo(), RandVerFlipVideo(), VideoToTensor(),] return ComposeVideo(img_transforms=img_transforms, video_transforms=video_transforms)