def test_video_classifier_finetune_from_csv(tmpdir): with mock_video_csv_file(tmpdir) as (mock_csv, total_duration): half_duration = total_duration / 2 - 1e-9 datamodule = VideoClassificationData.from_csv( "file", "target", train_file=mock_csv, clip_sampler="uniform", clip_duration=half_duration, video_sampler=SequentialSampler, decode_audio=False, batch_size=1, ) for sample in datamodule.train_dataset.data: expected_t_shape = 5 assert sample["video"].shape[1] == expected_t_shape model = VideoClassifier(num_classes=datamodule.num_classes, pretrained=False, backbone="slow_r50") trainer = flash.Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=torch.cuda.device_count()) trainer.finetune(model, datamodule=datamodule)
def test_jit(tmpdir): sample_input = torch.rand(1, 3, 32, 256, 256) path = os.path.join(tmpdir, "test.pt") model = VideoClassifier(2, pretrained=False) model.eval() # pytorchvideo only works with `torch.jit.trace` model = torch.jit.trace(model, sample_input) torch.jit.save(model, path) model = torch.jit.load(path) out = model(sample_input) assert isinstance(out, torch.Tensor) assert out.shape == torch.Size([1, 2])
def test_video_classifier_finetune_fiftyone(tmpdir): with mock_encoded_video_dataset_folder(tmpdir) as ( dir_name, total_duration, ): half_duration = total_duration / 2 - 1e-9 train_dataset = fo.Dataset.from_dir( dir_name, dataset_type=fo.types.VideoClassificationDirectoryTree, ) datamodule = VideoClassificationData.from_fiftyone( train_dataset=train_dataset, clip_sampler="uniform", clip_duration=half_duration, video_sampler=SequentialSampler, decode_audio=False, batch_size=1, ) for sample in datamodule.train_dataset.data: expected_t_shape = 5 assert sample["video"].shape[1] == expected_t_shape model = VideoClassifier(num_classes=datamodule.num_classes, pretrained=False, backbone="slow_r50") trainer = flash.Trainer(fast_dev_run=True, gpus=torch.cuda.device_count()) trainer.finetune(model, datamodule=datamodule)
import torch from torch.utils.data.sampler import RandomSampler import flash from flash.core.classification import Labels from flash.core.finetuning import NoFreeze from flash.data.utils import download_data from flash.utils.imports import _KORNIA_AVAILABLE, _PYTORCHVIDEO_AVAILABLE from flash.video import VideoClassificationData, VideoClassifier if _PYTORCHVIDEO_AVAILABLE and _KORNIA_AVAILABLE: import kornia.augmentation as K from pytorchvideo.transforms import ApplyTransformToKey, RandomShortSideScale, UniformTemporalSubsample from torchvision.transforms import CenterCrop, Compose, RandomCrop, RandomHorizontalFlip else: print("Please, run `pip install torchvideo kornia`") sys.exit(0) # 1. Download a video clip dataset. Find more dataset at https://pytorchvideo.readthedocs.io/en/latest/data.html download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip") model = VideoClassifier.load_from_checkpoint( "https://flash-weights.s3.amazonaws.com/video_classification.pt", pretrained=False) # 2. Make a prediction predict_folder = "data/kinetics/predict/" predictions = model.predict( [os.path.join(predict_folder, f) for f in os.listdir(predict_folder)]) print(predictions)
def test_video_classifier_finetune_fiftyone(tmpdir): with mock_encoded_video_dataset_folder(tmpdir) as ( dir_name, total_duration, ): half_duration = total_duration / 2 - 1e-9 train_dataset = fo.Dataset.from_dir( dir_name, dataset_type=fo.types.VideoClassificationDirectoryTree, ) datamodule = VideoClassificationData.from_fiftyone( train_dataset=train_dataset, clip_sampler="uniform", clip_duration=half_duration, video_sampler=SequentialSampler, decode_audio=False, ) for sample in datamodule.train_dataset.data: expected_t_shape = 5 assert sample["video"].shape[1] == expected_t_shape assert len(VideoClassifier.available_backbones()) > 5 train_transform = { "post_tensor_transform": Compose([ ApplyTransformToKey( key="video", transform=Compose([ UniformTemporalSubsample(8), RandomShortSideScale(min_size=256, max_size=320), RandomCrop(244), RandomHorizontalFlip(p=0.5), ]), ), ]), "per_batch_transform_on_device": Compose([ ApplyTransformToKey( key="video", transform=K.VideoSequential( K.Normalize(torch.tensor([0.45, 0.45, 0.45]), torch.tensor([0.225, 0.225, 0.225])), K.augmentation.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), data_format="BCTHW", same_on_frame=False ) ), ]), } datamodule = VideoClassificationData.from_fiftyone( train_dataset=train_dataset, clip_sampler="uniform", clip_duration=half_duration, video_sampler=SequentialSampler, decode_audio=False, train_transform=train_transform ) model = VideoClassifier(num_classes=datamodule.num_classes, pretrained=False) trainer = flash.Trainer(fast_dev_run=True) trainer.finetune(model, datamodule=datamodule)
def test_load_from_checkpoint_dependency_error(): with pytest.raises(ModuleNotFoundError, match=re.escape("'lightning-flash[video]'")): VideoClassifier.load_from_checkpoint("not_a_real_checkpoint.pt")
train_folder=os.path.join(flash.PROJECT_ROOT, "data/kinetics/train"), val_folder=os.path.join(flash.PROJECT_ROOT, "data/kinetics/val"), predict_folder=os.path.join(flash.PROJECT_ROOT, "data/kinetics/predict"), train_transform=make_transform(train_post_tensor_transform), val_transform=make_transform(val_post_tensor_transform), predict_transform=make_transform(val_post_tensor_transform), batch_size=8, clip_sampler="uniform", clip_duration=1, video_sampler=RandomSampler, decode_audio=False, num_workers=8) # 4. List the available models print(VideoClassifier.available_backbones()) # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs'] print(VideoClassifier.get_backbone_details("x3d_xs")) # 5. Build the VideoClassifier with a PyTorchVideo backbone. model = VideoClassifier(backbone="x3d_xs", num_classes=datamodule.num_classes, serializer=Labels(), pretrained=False) # 6. Finetune the model trainer = flash.Trainer(fast_dev_run=True) trainer.finetune(model, datamodule=datamodule, strategy=NoFreeze()) trainer.save_checkpoint("video_classification.pt")
datamodule = VideoClassificationData.from_folders( train_folder=os.path.join(_PATH_ROOT, "data/kinetics/train"), val_folder=os.path.join(_PATH_ROOT, "data/kinetics/val"), predict_folder=os.path.join(_PATH_ROOT, "data/kinetics/predict"), train_transform=make_transform(train_post_tensor_transform), val_transform=make_transform(val_post_tensor_transform), predict_transform=make_transform(val_post_tensor_transform), batch_size=8, clip_sampler="uniform", clip_duration=2, video_sampler=RandomSampler, decode_audio=False, ) # 4. List the available models print(VideoClassifier.available_models()) # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs'] print(VideoClassifier.get_model_details("x3d_xs")) # 5. Build the model - `x3d_xs` comes with `nn.Softmax` by default for their `head_activation`. model = VideoClassifier(model="x3d_xs", num_classes=datamodule.num_classes) model.serializer = Labels() # 6. Finetune the model trainer = flash.Trainer(max_epochs=3) trainer.finetune(model, datamodule=datamodule, strategy=NoFreeze()) trainer.save_checkpoint("video_classification.pt") # 7. Make a prediction predictions = model.predict(
from flash.core.data.utils import download_data from flash.video import VideoClassificationData, VideoClassifier # 1. Create the DataModule # Find more datasets at https://pytorchvideo.readthedocs.io/en/latest/data.html download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip", "./data") datamodule = VideoClassificationData.from_folders( train_folder=os.path.join(os.getcwd(), "data/kinetics/train"), val_folder=os.path.join(os.getcwd(), "data/kinetics/val"), clip_sampler="uniform", clip_duration=1, decode_audio=False, ) # 2. Build the task model = VideoClassifier(backbone="x3d_xs", num_classes=datamodule.num_classes, pretrained=False) # 3. Create the trainer and finetune the model trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count()) trainer.finetune(model, datamodule=datamodule, strategy="freeze") # 4. Make a prediction predictions = model.predict(os.path.join(os.getcwd(), "data/kinetics/predict")) print(predictions) # 5. Save the model! trainer.save_checkpoint("video_classification.pt")
"https://label-studio-testdata.s3.us-east-2.amazonaws.com/lightning-flash/video_data.zip" ) # 2. Load export data datamodule = VideoClassificationData.from_labelstudio( export_json="data/project.json", data_folder="data/upload/", val_split=0.2, clip_sampler="uniform", clip_duration=1, decode_audio=False, ) # 3. Build the task model = VideoClassifier( backbone="slow_r50", num_classes=datamodule.num_classes, ) # 4. Create the trainer and finetune the model trainer = flash.Trainer(max_epochs=3) trainer.finetune(model, datamodule=datamodule, strategy="freeze") # 5. Make a prediction datamodule = VideoClassificationData.from_folders( predict_folder=os.path.join(os.getcwd(), "data/test")) predictions = trainer.predict(model, datamodule=datamodule) # 6. Save the model! trainer.save_checkpoint("video_classification.pt") # 7. Visualize predictions
# 1. Create the DataModule # Find more datasets at https://pytorchvideo.readthedocs.io/en/latest/data.html download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip", "./data") datamodule = VideoClassificationData.from_folders( train_folder="data/kinetics/train", val_folder="data/kinetics/val", clip_sampler="uniform", clip_duration=1, decode_audio=False, batch_size=1, ) # 2. Build the task model = VideoClassifier(backbone="x3d_xs", labels=datamodule.labels, pretrained=False) # 3. Create the trainer and finetune the model trainer = flash.Trainer( max_epochs=1, gpus=torch.cuda.device_count(), strategy="ddp" if torch.cuda.device_count() > 1 else None ) trainer.finetune(model, datamodule=datamodule, strategy="freeze") # 4. Make a prediction datamodule = VideoClassificationData.from_folders(predict_folder="data/kinetics/predict", batch_size=1) predictions = trainer.predict(model, datamodule=datamodule, output="labels") print(predictions) # 5. Save the model! trainer.save_checkpoint("video_classification.pt")
train_data_path=os.path.join(_PATH_ROOT, "data/kinetics/train"), val_data_path=os.path.join(_PATH_ROOT, "data/kinetics/val"), predict_data_path=os.path.join(_PATH_ROOT, "data/kinetics/predict"), clip_sampler="uniform", clip_duration=2, video_sampler=RandomSampler, decode_audio=False, train_transform=make_transform(train_post_tensor_transform), val_transform=make_transform(val_post_tensor_transform), predict_transform=make_transform(val_post_tensor_transform), num_workers=8, batch_size=8, ) # 4. List the available models print(VideoClassifier.available_models()) # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs'] print(VideoClassifier.get_model_details("x3d_xs")) # 5. Build the model - `x3d_xs` comes with `nn.Softmax` by default for their `head_activation`. model = VideoClassifier(model="x3d_xs", num_classes=datamodule.num_classes) model.serializer = Labels() # 6. Finetune the model trainer = flash.Trainer(max_epochs=3, gpus=1) trainer.finetune(model, datamodule=datamodule, strategy=NoFreeze()) trainer.save_checkpoint("video_classification.pt") # 7. Make a prediction val_folder = os.path.join(