Пример #1
0
 def test_composevideo(self):
     img_transforms = [CenterCrop(30)]
     video_transforms = [RandomCropVideo(30)]
     compose = ComposeVideo(img_transforms, video_transforms)
     vid = self._video(10)
     vid = compose(vid)
     compose = ComposeVideo()
     vid = self._video(10)
     vid = compose(vid)
    def __init__(self, train_dir, valid_dir, batch_size=200, n_frames=20):

        transforms = ComposeVideo([CenterCrop(128), Scale((224, 224))])
        self.n_frames = n_frames

        self.train_dataset = GulpVideoDataset(train_dir,
                                              n_frames,
                                              1,
                                              False,
                                              transform=transforms)
        self.train_loader = DataLoader(self.train_dataset,
                                       batch_size=10,
                                       shuffle=False,
                                       num_workers=8,
                                       drop_last=True)

        self.val_dataset = GulpVideoDataset(valid_dir,
                                            n_frames,
                                            1,
                                            False,
                                            transform=transforms)
        self.val_loader = DataLoader(self.val_dataset,
                                     batch_size=batch_size,
                                     shuffle=False,
                                     num_workers=8,
                                     drop_last=True)
Пример #3
0
def get_spot_check_transform():
    
    img_transforms = [CenterCrop(512),]
    #video_transforms = [RandomCropVideo(512), RandHorFlipVideo(), RandVerFlipVideo(),]
    video_transforms = [RandHorFlipVideo(), RandVerFlipVideo(),]

    return ComposeVideo(img_transforms=img_transforms, video_transforms=video_transforms)
Пример #4
0
 def test_compose(self):
     transforms = [CenterCrop(30)]
     compose = Compose(transforms)
     img = self._img()
     img = compose(img)
     compose = ComposeVideo()
     img = self._img()
     img = compose(img)
Пример #5
0
# I've only pushed it as an example.
dataset_path = "./data"

repo = "epic-kitchens/action-models"

class_counts = (125, 352)
segment_count = 8
base_model = "resnet50"

batch_size = 1
segment_count = 8
snippet_length = 1  # Number of frames composing the snippet, 1 for RGB, 5 for optical flow
snippet_channels = 3  # Number of channels in a frame, 3 for RGB, 2 for optical flow
height, width = 224, 224

scale = ComposeVideo([Scale((height, width))])

# Eight segments each composed of one frame.
# Each segment is ten seconds apart.
dataset = video_dataset = VideoDataset(dataset_path,
                                       num_frames=snippet_length *
                                       segment_count,
                                       step_size=10,
                                       transform=scale,
                                       is_val=False)

loader = DataLoader(dataset,
                    batch_size=batch_size,
                    num_workers=0,
                    shuffle=False)
Пример #6
0
def main():

    if args.optical_flow:
        dataset = OpticalFlowDataset(args.dataset_path,
                                     num_frames=args.max_frames,
                                     step_size=1)

        loader = DataLoaderPytorch(dataset,
                                   batch_size=args.video_batch_size,
                                   num_workers=0,
                                   shuffle=False)
    else:
        scale = ComposeVideo([Scale((HEIGHT, WIDTH))])

        dataset = VideoDataset(args.dataset_path, 
                            num_frames=args.max_frames,
                            step_size=1, 
                            is_val=True,
                            transform=scale, 
                            stack=True,
                            random_offset=False)

        loader = DataLoader(dataset, 
                            batch_size=args.video_batch_size, 
                            num_workers=0, 
                            shuffle=False)


    repo = "epic-kitchens/action-models"
    class_counts = (125, 352)
    base_model = "resnet50"

    t = "RGB" if not args.optical_flow else "Flow"
    model = torch.hub.load(repo, "TSM", 
                          class_counts, args.segment_count, t,
                          base_model=base_model, 
                          pretrained="epic-kitchens").cuda()


    try:
        if args.optical_flow:
            checkpoint = torch.load(flow_checkpoint_path)
            model.load_state_dict(checkpoint['state_dict'])
    except:
        print("Unable to load TSM Optical Flow checkpoint.")
        print("Please download it from: https://data.bris.ac.uk/data/dataset/2tw6gdvmfj3f12papdy24flvmo")
        return 1


    logits_dir = os.path.join("./output/", 
                              os.path.basename(os.path.abspath(args.dataset_path)))
    last_id, last_logits = None, []

    if args.store_logits and not os.path.isdir(logits_dir):
        os.mkdir(logits_dir)

    results = defaultdict(list)

    for chunk, youtube_id in batch_it(loader):

        features = model.features(chunk)
        verb_logits, noun_logits = model.logits(features)

        verb_logits_cpu, noun_logits_cpu = verb_logits.cpu(), noun_logits.cpu()

        verbs = verb_logits_cpu.argmax(dim=1).numpy().tolist()
        nouns = noun_logits_cpu.argmax(dim=1).numpy().tolist()

        results[youtube_id].extend(list(zip(verbs, nouns)))

        if args.store_logits:
            if last_id is None:
                last_id = youtube_id
                last_logits = [verb_logits_cpu.numpy(), noun_logits_cpu.numpy()]
                continue

            if last_id != youtube_id:
                np.save(os.path.join(logits_dir, "{}_verb.npy".format(last_id)), last_logits[0])
                np.save(os.path.join(logits_dir, "{}_noun.npy".format(last_id)), last_logits[1])

                last_id = youtube_id
                last_logits = [verb_logits_cpu, noun_logits_cpu]
            else:
                last_logits[0] = np.concatenate((last_logits[0], verb_logits_cpu))
                last_logits[1] = np.concatenate((last_logits[1], noun_logits_cpu))

    pickle.dump(results, 
                open(os.path.join("./output/", args.save_fname), "wb"))

    if args.store_logits:
        np.save(os.path.join(logits_dir, "{}_verb.npy".format(last_id)), last_logits[0])
        np.save(os.path.join(logits_dir, "{}_noun.npy".format(last_id)), last_logits[1])

    return 0
Пример #7
0
def get_test_transform():
    
    img_transforms = [CenterCrop(512),]
    video_transforms = [VideoToTensor(),]

    return ComposeVideo(img_transforms=img_transforms, video_transforms=video_transforms)
Пример #8
0
def get_train_transform():

    img_transforms = []
    video_transforms = [RandomCropVideo(512), RandHorFlipVideo(), RandVerFlipVideo(), VideoToTensor(),]

    return ComposeVideo(img_transforms=img_transforms, video_transforms=video_transforms)