predict_transform=make_transform(val_post_tensor_transform), batch_size=8, clip_sampler="uniform", clip_duration=1, video_sampler=RandomSampler, decode_audio=False, num_workers=8) # 4. List the available models print(VideoClassifier.available_backbones()) # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs'] print(VideoClassifier.get_backbone_details("x3d_xs")) # 5. Build the VideoClassifier with a PyTorchVideo backbone. model = VideoClassifier(backbone="x3d_xs", num_classes=datamodule.num_classes, serializer=Labels(), pretrained=False) # 6. Finetune the model trainer = flash.Trainer(fast_dev_run=True) trainer.finetune(model, datamodule=datamodule, strategy=NoFreeze()) trainer.save_checkpoint("video_classification.pt") # 7. Make a prediction predictions = model.predict( os.path.join(flash.PROJECT_ROOT, "data/kinetics/predict")) print(predictions) # ['marching', 'flying_kite', 'archery', 'high_jump', 'bowling']
from flash.core.data.utils import download_data from flash.video import VideoClassificationData, VideoClassifier # 1. Create the DataModule # Find more datasets at https://pytorchvideo.readthedocs.io/en/latest/data.html download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip", "./data") datamodule = VideoClassificationData.from_folders( train_folder=os.path.join(os.getcwd(), "data/kinetics/train"), val_folder=os.path.join(os.getcwd(), "data/kinetics/val"), clip_sampler="uniform", clip_duration=1, decode_audio=False, ) # 2. Build the task model = VideoClassifier(backbone="x3d_xs", num_classes=datamodule.num_classes, pretrained=False) # 3. Create the trainer and finetune the model trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count()) trainer.finetune(model, datamodule=datamodule, strategy="freeze") # 4. Make a prediction predictions = model.predict(os.path.join(os.getcwd(), "data/kinetics/predict")) print(predictions) # 5. Save the model! trainer.save_checkpoint("video_classification.pt")
predict_folder=os.path.join(_PATH_ROOT, "data/kinetics/predict"), train_transform=make_transform(train_post_tensor_transform), val_transform=make_transform(val_post_tensor_transform), predict_transform=make_transform(val_post_tensor_transform), batch_size=8, clip_sampler="uniform", clip_duration=2, video_sampler=RandomSampler, decode_audio=False, ) # 4. List the available models print(VideoClassifier.available_models()) # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs'] print(VideoClassifier.get_model_details("x3d_xs")) # 5. Build the model - `x3d_xs` comes with `nn.Softmax` by default for their `head_activation`. model = VideoClassifier(model="x3d_xs", num_classes=datamodule.num_classes) model.serializer = Labels() # 6. Finetune the model trainer = flash.Trainer(max_epochs=3) trainer.finetune(model, datamodule=datamodule, strategy=NoFreeze()) trainer.save_checkpoint("video_classification.pt") # 7. Make a prediction predictions = model.predict( os.path.join(_PATH_ROOT, "data/kinetics/predict")) print(predictions)
video_sampler=RandomSampler, decode_audio=False, train_transform=make_transform(train_post_tensor_transform), val_transform=make_transform(val_post_tensor_transform), predict_transform=make_transform(val_post_tensor_transform), num_workers=8, batch_size=8, ) # 4. List the available models print(VideoClassifier.available_models()) # out: ['efficient_x3d_s', 'efficient_x3d_xs', ... ,slowfast_r50', 'x3d_m', 'x3d_s', 'x3d_xs'] print(VideoClassifier.get_model_details("x3d_xs")) # 5. Build the model - `x3d_xs` comes with `nn.Softmax` by default for their `head_activation`. model = VideoClassifier(model="x3d_xs", num_classes=datamodule.num_classes) model.serializer = Labels() # 6. Finetune the model trainer = flash.Trainer(max_epochs=3, gpus=1) trainer.finetune(model, datamodule=datamodule, strategy=NoFreeze()) trainer.save_checkpoint("video_classification.pt") # 7. Make a prediction val_folder = os.path.join( _PATH_ROOT, os.path.join(_PATH_ROOT, "data/kinetics/predict")) predictions = model.predict( [os.path.join(val_folder, f) for f in os.listdir(val_folder)]) print(predictions)