Пример #1
0
def test_video_classifier_finetune_fiftyone(tmpdir):

    with mock_encoded_video_dataset_folder(tmpdir) as (
            dir_name,
            total_duration,
    ):

        half_duration = total_duration / 2 - 1e-9

        train_dataset = fo.Dataset.from_dir(
            dir_name,
            dataset_type=fo.types.VideoClassificationDirectoryTree,
        )
        datamodule = VideoClassificationData.from_fiftyone(
            train_dataset=train_dataset,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
            batch_size=1,
        )

        for sample in datamodule.train_dataset.data:
            expected_t_shape = 5
            assert sample["video"].shape[1] == expected_t_shape

        model = VideoClassifier(num_classes=datamodule.num_classes,
                                pretrained=False,
                                backbone="slow_r50")
        trainer = flash.Trainer(fast_dev_run=True,
                                gpus=torch.cuda.device_count())
        trainer.finetune(model, datamodule=datamodule)
Пример #2
0
def test_video_classifier_finetune_from_csv(tmpdir):
    with mock_video_csv_file(tmpdir) as (mock_csv, total_duration):

        half_duration = total_duration / 2 - 1e-9

        datamodule = VideoClassificationData.from_csv(
            "file",
            "target",
            train_file=mock_csv,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
            batch_size=1,
        )

        for sample in datamodule.train_dataset.data:
            expected_t_shape = 5
            assert sample["video"].shape[1] == expected_t_shape

        model = VideoClassifier(num_classes=datamodule.num_classes,
                                pretrained=False,
                                backbone="slow_r50")
        trainer = flash.Trainer(default_root_dir=tmpdir,
                                fast_dev_run=True,
                                gpus=torch.cuda.device_count())
        trainer.finetune(model, datamodule=datamodule)
Пример #3
0
def test_jit(tmpdir):
    sample_input = torch.rand(1, 3, 32, 256, 256)
    path = os.path.join(tmpdir, "test.pt")

    model = VideoClassifier(2, pretrained=False)
    model.eval()

    # pytorchvideo only works with `torch.jit.trace`
    model = torch.jit.trace(model, sample_input)

    torch.jit.save(model, path)
    model = torch.jit.load(path)

    out = model(sample_input)
    assert isinstance(out, torch.Tensor)
    assert out.shape == torch.Size([1, 2])
Пример #4
0
def test_video_classifier_finetune_fiftyone(tmpdir):

    with mock_encoded_video_dataset_folder(tmpdir) as (
        dir_name,
        total_duration,
    ):

        half_duration = total_duration / 2 - 1e-9

        train_dataset = fo.Dataset.from_dir(
            dir_name,
            dataset_type=fo.types.VideoClassificationDirectoryTree,
        )
        datamodule = VideoClassificationData.from_fiftyone(
            train_dataset=train_dataset,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
        )

        for sample in datamodule.train_dataset.data:
            expected_t_shape = 5
            assert sample["video"].shape[1] == expected_t_shape

        assert len(VideoClassifier.available_backbones()) > 5

        train_transform = {
            "post_tensor_transform": Compose([
                ApplyTransformToKey(
                    key="video",
                    transform=Compose([
                        UniformTemporalSubsample(8),
                        RandomShortSideScale(min_size=256, max_size=320),
                        RandomCrop(244),
                        RandomHorizontalFlip(p=0.5),
                    ]),
                ),
            ]),
            "per_batch_transform_on_device": Compose([
                ApplyTransformToKey(
                    key="video",
                    transform=K.VideoSequential(
                        K.Normalize(torch.tensor([0.45, 0.45, 0.45]), torch.tensor([0.225, 0.225, 0.225])),
                        K.augmentation.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0),
                        data_format="BCTHW",
                        same_on_frame=False
                    )
                ),
            ]),
        }

        datamodule = VideoClassificationData.from_fiftyone(
            train_dataset=train_dataset,
            clip_sampler="uniform",
            clip_duration=half_duration,
            video_sampler=SequentialSampler,
            decode_audio=False,
            train_transform=train_transform
        )

        model = VideoClassifier(num_classes=datamodule.num_classes, pretrained=False)

        trainer = flash.Trainer(fast_dev_run=True)

        trainer.finetune(model, datamodule=datamodule)
        predict_transform=make_transform(val_post_tensor_transform),
        batch_size=8,
        clip_sampler="uniform",
        clip_duration=1,
        video_sampler=RandomSampler,
        decode_audio=False,
        num_workers=8)

    # 4. List the available models
    print(VideoClassifier.available_backbones())
    # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs']
    print(VideoClassifier.get_backbone_details("x3d_xs"))

    # 5. Build the VideoClassifier with a PyTorchVideo backbone.
    model = VideoClassifier(backbone="x3d_xs",
                            num_classes=datamodule.num_classes,
                            serializer=Labels(),
                            pretrained=False)

    # 6. Finetune the model
    trainer = flash.Trainer(fast_dev_run=True)
    trainer.finetune(model, datamodule=datamodule, strategy=NoFreeze())

    trainer.save_checkpoint("video_classification.pt")

    # 7. Make a prediction
    predictions = model.predict(
        os.path.join(flash.PROJECT_ROOT, "data/kinetics/predict"))
    print(predictions)
    # ['marching', 'flying_kite', 'archery', 'high_jump', 'bowling']
Пример #6
0
        predict_folder=os.path.join(_PATH_ROOT, "data/kinetics/predict"),
        train_transform=make_transform(train_post_tensor_transform),
        val_transform=make_transform(val_post_tensor_transform),
        predict_transform=make_transform(val_post_tensor_transform),
        batch_size=8,
        clip_sampler="uniform",
        clip_duration=2,
        video_sampler=RandomSampler,
        decode_audio=False,
    )

    # 4. List the available models
    print(VideoClassifier.available_models())
    # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs']
    print(VideoClassifier.get_model_details("x3d_xs"))

    # 5. Build the model - `x3d_xs` comes with `nn.Softmax` by default for their `head_activation`.
    model = VideoClassifier(model="x3d_xs", num_classes=datamodule.num_classes)
    model.serializer = Labels()

    # 6. Finetune the model
    trainer = flash.Trainer(max_epochs=3)
    trainer.finetune(model, datamodule=datamodule, strategy=NoFreeze())

    trainer.save_checkpoint("video_classification.pt")

    # 7. Make a prediction
    predictions = model.predict(
        os.path.join(_PATH_ROOT, "data/kinetics/predict"))
    print(predictions)
from flash.core.data.utils import download_data
from flash.video import VideoClassificationData, VideoClassifier

# 1. Create the DataModule
# Find more datasets at https://pytorchvideo.readthedocs.io/en/latest/data.html
download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip", "./data")

datamodule = VideoClassificationData.from_folders(
    train_folder=os.path.join(os.getcwd(), "data/kinetics/train"),
    val_folder=os.path.join(os.getcwd(), "data/kinetics/val"),
    clip_sampler="uniform",
    clip_duration=1,
    decode_audio=False,
)

# 2. Build the task
model = VideoClassifier(backbone="x3d_xs",
                        num_classes=datamodule.num_classes,
                        pretrained=False)

# 3. Create the trainer and finetune the model
trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count())
trainer.finetune(model, datamodule=datamodule, strategy="freeze")

# 4. Make a prediction
predictions = model.predict(os.path.join(os.getcwd(), "data/kinetics/predict"))
print(predictions)

# 5. Save the model!
trainer.save_checkpoint("video_classification.pt")
    "https://label-studio-testdata.s3.us-east-2.amazonaws.com/lightning-flash/video_data.zip"
)

# 2. Load export data
datamodule = VideoClassificationData.from_labelstudio(
    export_json="data/project.json",
    data_folder="data/upload/",
    val_split=0.2,
    clip_sampler="uniform",
    clip_duration=1,
    decode_audio=False,
)

# 3. Build the task
model = VideoClassifier(
    backbone="slow_r50",
    num_classes=datamodule.num_classes,
)

# 4. Create the trainer and finetune the model
trainer = flash.Trainer(max_epochs=3)
trainer.finetune(model, datamodule=datamodule, strategy="freeze")

# 5. Make a prediction
datamodule = VideoClassificationData.from_folders(
    predict_folder=os.path.join(os.getcwd(), "data/test"))
predictions = trainer.predict(model, datamodule=datamodule)

# 6. Save the model!
trainer.save_checkpoint("video_classification.pt")

# 7. Visualize predictions
# 1. Create the DataModule
# Find more datasets at https://pytorchvideo.readthedocs.io/en/latest/data.html
download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip", "./data")

datamodule = VideoClassificationData.from_folders(
    train_folder="data/kinetics/train",
    val_folder="data/kinetics/val",
    clip_sampler="uniform",
    clip_duration=1,
    decode_audio=False,
    batch_size=1,
)

# 2. Build the task
model = VideoClassifier(backbone="x3d_xs", labels=datamodule.labels, pretrained=False)

# 3. Create the trainer and finetune the model
trainer = flash.Trainer(
    max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp" if torch.cuda.device_count() > 1 else None
)
trainer.finetune(model, datamodule=datamodule, strategy="freeze")

# 4. Make a prediction
datamodule = VideoClassificationData.from_folders(predict_folder="data/kinetics/predict", batch_size=1)
predictions = trainer.predict(model, datamodule=datamodule, output="labels")
print(predictions)

# 5. Save the model!
trainer.save_checkpoint("video_classification.pt")