def test_resize_after_frame_selection(): test_vid = TEST_VIDEOS_DIR / "data" / "raw" / "benjamin" / "04250002.MP4" resize_before_vlc = VideoLoaderConfig( frame_selection_height=10, frame_selection_width=12, ensure_total_frames=True, megadetector_lite_config={ "confidence": 0.25, "fill_mode": "score_sorted", "n_frames": 16, }, ) a = load_video_frames(filepath=test_vid, config=resize_before_vlc) # use full size image for MDLite resize_after_vlc = VideoLoaderConfig( model_input_height=10, model_input_width=12, ensure_total_frames=True, megadetector_lite_config={ "confidence": 0.25, "fill_mode": "score_sorted", "n_frames": 16, }, ) b = load_video_frames(filepath=test_vid, config=resize_after_vlc) # shapes should be the same assert a.shape == b.shape # but we expect some frame differences assert (a != b).any()
def test_denseposeconfig(model, tmp_path): # validation failures with pytest.raises(ValidationError): DensePoseConfig( video_loader_config=VideoLoaderConfig(fps=0.2), output_type="bananas", render_output=True, embeddings_in_json=False, data_dir=ASSETS_DIR / "densepose_tests", save_dir=tmp_path, ) dpc = DensePoseConfig( video_loader_config=VideoLoaderConfig(fps=0.2), output_type="segmentation" if model == "animals" else "chimp_anatomy", render_output=True, embeddings_in_json=False, data_dir=ASSETS_DIR / "densepose_tests", save_dir=tmp_path, ) dpc.run_model() # ensure all outputs are saved in save_dir assert (tmp_path / "chimp_denspose_video.mp4").exists() assert (tmp_path / "chimp_denspose_labels.json").exists() if model == "chimp_anatomy": assert (tmp_path / "chimp_denspose_anatomy.csv").exists()
def test_validate_total_frames(): config = VideoLoaderConfig( megadetector_lite_config=MegadetectorLiteYoloXConfig(confidence=0.01, n_frames=None), total_frames=10, ) assert config.megadetector_lite_config.n_frames == 10 config = VideoLoaderConfig( megadetector_lite_config=MegadetectorLiteYoloXConfig(confidence=0.01, n_frames=8), ) assert config.total_frames == 8
def test_same_filename_new_kwargs(tmp_path, train_metadata): """Test that load_video_frames does not load the npz file if the params change.""" cache = tmp_path / "test_cache" # prep labels for one video labels = ( train_metadata[train_metadata.split == "train"] .set_index("filepath") .filter(regex="species") .head(1) ) def _generate_dataset(config): """Return loaded video from FFmpegZambaVideoDataset.""" return FfmpegZambaVideoDataset(annotations=labels, video_loader_config=config).__getitem__( index=0 )[0] with mock.patch.dict(os.environ, {"VIDEO_CACHE_DIR": str(cache)}): # confirm cache is set in environment variable assert os.environ["VIDEO_CACHE_DIR"] == str(cache) first_load = _generate_dataset(config=VideoLoaderConfig(fps=1)) new_params_same_name = _generate_dataset(config=VideoLoaderConfig(fps=2)) assert first_load.shape != new_params_same_name.shape # check no params no_params_same_name = _generate_dataset(config=None) assert first_load.shape != new_params_same_name.shape != no_params_same_name.shape # multiple params in config first_load = _generate_dataset(config=VideoLoaderConfig(scene_threshold=0.2)) new_params_same_name = _generate_dataset( config=VideoLoaderConfig(scene_threshold=0.2, crop_bottom_pixels=2) ) assert first_load.shape != new_params_same_name.shape
def get_default_video_loader_config(cls, values): if values["video_loader_config"] is None: model_name = (values["train_config"].model_name if values["train_config"] is not None else values["predict_config"].model_name) logger.info( f"No video loader config specified. Using default for {model_name}." ) config_file = MODELS_DIRECTORY / f"{model_name}/config.yaml" with config_file.open() as f: config_dict = yaml.safe_load(f) values["video_loader_config"] = VideoLoaderConfig( **config_dict["video_loader_config"]) return values
def __init__( self, annotations: pd.DataFrame, transform: Optional[torchvision.transforms.transforms.Compose] = None, video_loader_config: Optional[VideoLoaderConfig] = None, ): self.original_indices = annotations.index self.video_paths = annotations.index.tolist() self.species = [s.split("species_", 1)[1] for s in annotations.columns] self.targets = annotations self.transform = transform # get environment variable for cache if it exists if video_loader_config is None: video_loader_config = VideoLoaderConfig() self.video_loader_config = video_loader_config super().__init__(root=None, transform=transform)
def test_video(model, chimp_video_path, tmp_path): dpm = DensePoseManager(model=MODELS[model]) # segmentation vid, preds = dpm.predict_video( chimp_video_path, video_loader_config=VideoLoaderConfig(fps=0.2) ) assert vid.shape == (3, 180, 320, 3) assert len(preds) > 0 # serialize results serialized = dpm.serialize_video_output( preds, filename=tmp_path / f"output_{model}.json", write_embeddings=False ) deserialized = dpm.deserialize_output(filename=tmp_path / f"output_{model}.json") assert serialized is not None assert (tmp_path / f"output_{model}.json").stat().st_size > 0 assert len(deserialized) == len(preds) # visualize image visualized_vid_arr = dpm.visualize_video( vid, preds, output_path=(tmp_path / f"viz_vid_{model}.mp4") ) assert (tmp_path / f"viz_vid_{model}.mp4").stat().st_size > 0 assert visualized_vid_arr.shape == vid.shape assert (visualized_vid_arr != vid).any() # anantomy if model == "chimps": anatomy_info = dpm.anatomize_video( visualized_vid_arr, preds, output_path=(tmp_path / f"anatomized_{model}.csv") ) # output to disk assert anatomy_info.shape == (8, 46) assert (anatomy_info > 0).any().any() assert (tmp_path / f"anatomized_{model}.csv").stat().st_size > 0
def test_caching(tmp_path, caplog, train_metadata): cache = tmp_path / "video_cache" # prep labels for one video labels = ( train_metadata[train_metadata.split == "train"] .set_index("filepath") .filter(regex="species") .head(1) ) # no caching by default _ = FfmpegZambaVideoDataset( annotations=labels, ).__getitem__(index=0) assert not cache.exists() # caching can be specifed in config _ = FfmpegZambaVideoDataset( annotations=labels, video_loader_config=VideoLoaderConfig(fps=1, cache_dir=cache) ).__getitem__(index=0) # one file in cache assert len([f for f in cache.rglob("*") if f.is_file()]) == 1 shutil.rmtree(cache) # or caching can be specified in environment variable with mock.patch.dict(os.environ, {"VIDEO_CACHE_DIR": str(cache)}): _ = FfmpegZambaVideoDataset( annotations=labels, ).__getitem__(index=0) assert len([f for f in cache.rglob("*") if f.is_file()]) == 1 # changing cleanup in config does not prompt new hashing of videos with mock.patch.dict(os.environ, {"LOG_LEVEL": "DEBUG"}): _ = FfmpegZambaVideoDataset( annotations=labels, video_loader_config=VideoLoaderConfig(cleanup_cache=True) ).__getitem__(index=0) assert "Loading from cache" in caplog.text # if no config is passed, this is equivalent to specifying None/False in all non-cache related VLC params no_config = FfmpegZambaVideoDataset(annotations=labels, video_loader_config=None).__getitem__( index=0 )[0] config_with_nones = FfmpegZambaVideoDataset( annotations=labels, video_loader_config=VideoLoaderConfig( crop_bottom_pixels=None, i_frames=False, scene_threshold=None, megadetector_lite_config=None, frame_selection_height=None, frame_selection_width=None, total_frames=None, ensure_total_frames=False, fps=None, early_bias=False, frame_indices=None, evenly_sample_total_frames=False, pix_fmt="rgb24", model_input_height=None, model_input_width=None, ), ).__getitem__(index=0)[0] assert np.array_equal(no_config, config_with_nones)
def dummy_video_loader_config(): return VideoLoaderConfig(total_frames=4, frame_selection_height=19, frame_selection_width=19)
def train( data_dir: Path = typer.Option(None, exists=True, help="Path to folder containing videos."), labels: Path = typer.Option(None, exists=True, help="Path to csv containing video labels."), model: ModelEnum = typer.Option( "time_distributed", help= "Model to train. Model will be superseded by checkpoint if provided.", ), checkpoint: Path = typer.Option( None, exists=True, help= "Model checkpoint path to use for training. If provided, model is not required.", ), config: Path = typer.Option( None, exists=True, help= "Specify options using yaml configuration file instead of through command line options.", ), batch_size: int = typer.Option(None, help="Batch size to use for training."), gpus: int = typer.Option( None, help= "Number of GPUs to use for training. If not specifiied, will use all GPUs found on machine.", ), dry_run: bool = typer.Option( None, help="Runs one batch of train and validation to check for bugs.", ), save_dir: Path = typer.Option( None, help= "An optional directory in which to save the model checkpoint and configuration file. If not specified, will save to a `version_n` folder in your working directory.", ), num_workers: int = typer.Option( None, help="Number of subprocesses to use for data loading.", ), weight_download_region: RegionEnum = typer.Option( None, help="Server region for downloading weights."), skip_load_validation: bool = typer.Option( None, help= "Skip check that verifies all videos can be loaded prior to training. Only use if you're very confident all your videos can be loaded.", ), yes: bool = typer.Option( False, "--yes", "-y", help= "Skip confirmation of configuration and proceed right to training.", ), ): """Train a model on your labeled data. If an argument is specified in both the command line and in a yaml file, the command line input will take precedence. """ if config is not None: with config.open() as f: config_dict = yaml.safe_load(f) config_file = config else: with (MODELS_DIRECTORY / f"{model.value}/config.yaml").open() as f: config_dict = yaml.safe_load(f) config_file = None if "video_loader_config" in config_dict.keys(): video_loader_config = VideoLoaderConfig( **config_dict["video_loader_config"]) else: video_loader_config = None train_dict = config_dict["train_config"] # override if any command line arguments are passed if data_dir is not None: train_dict["data_dir"] = data_dir if labels is not None: train_dict["labels"] = labels if model != "time_distributed": train_dict["model_name"] = model if checkpoint is not None: train_dict["checkpoint"] = checkpoint if batch_size is not None: train_dict["batch_size"] = batch_size if gpus is not None: train_dict["gpus"] = gpus if dry_run is not None: train_dict["dry_run"] = dry_run if save_dir is not None: train_dict["save_dir"] = save_dir if num_workers is not None: train_dict["num_workers"] = num_workers if weight_download_region is not None: train_dict["weight_download_region"] = weight_download_region if skip_load_validation is not None: train_dict["skip_load_validation"] = skip_load_validation try: manager = ModelManager( ModelConfig( video_loader_config=video_loader_config, train_config=TrainConfig(**train_dict), )) except ValidationError as ex: logger.error("Invalid configuration.") raise typer.Exit(ex) config = manager.config # get species to confirm spacer = "\n\t- " species = spacer + spacer.join( sorted([ c.split("species_", 1)[1] for c in config.train_config.labels.filter(regex="species").columns ])) msg = f"""The following configuration will be used for training: Config file: {config_file} Data directory: {data_dir if data_dir is not None else config_dict["train_config"].get("data_dir")} Labels csv: {labels if labels is not None else config_dict["train_config"].get("labels")} Species: {species} Model name: {config.train_config.model_name} Checkpoint: {checkpoint if checkpoint is not None else config_dict["train_config"].get("checkpoint")} Batch size: {config.train_config.batch_size} Number of workers: {config.train_config.num_workers} GPUs: {config.train_config.gpus} Dry run: {config.train_config.dry_run} Save directory: {config.train_config.save_dir} Weight download region: {config.train_config.weight_download_region} """ if yes: typer.echo(f"{msg}\n\nSkipping confirmation and proceeding to train.") else: yes = typer.confirm( f"{msg}\n\nIs this correct?", abort=False, default=True, ) if yes: # kick off training manager.train()
def predict( data_dir: Path = typer.Option(None, exists=True, help="Path to folder containing videos."), filepaths: Path = typer.Option( None, exists=True, help="Path to csv containing `filepath` column with videos."), model: ModelEnum = typer.Option( "time_distributed", help= "Model to use for inference. Model will be superseded by checkpoint if provided.", ), checkpoint: Path = typer.Option( None, exists=True, help= "Model checkpoint path to use for inference. If provided, model is not required.", ), gpus: int = typer.Option( None, help= "Number of GPUs to use for inference. If not specifiied, will use all GPUs found on machine.", ), batch_size: int = typer.Option(None, help="Batch size to use for training."), save: bool = typer.Option( None, help= "Whether to save out predictions. If you want to specify the output directory, use save_dir instead.", ), save_dir: Path = typer.Option( None, help= "An optional directory in which to save the model predictions and configuration yaml. " "Defaults to the current working directory if save is True.", ), dry_run: bool = typer.Option( None, help="Runs one batch of inference to check for bugs."), config: Path = typer.Option( None, exists=True, help= "Specify options using yaml configuration file instead of through command line options.", ), proba_threshold: float = typer.Option( None, help= "Probability threshold for classification between 0 and 1. If specified binary predictions " "are returned with 1 being greater than the threshold, 0 being less than or equal to. If not " "specified, probabilities between 0 and 1 are returned.", ), output_class_names: bool = typer.Option( None, help= "If True, we just return a video and the name of the most likely class. If False, " "we return a probability or indicator (depending on --proba_threshold) for every " "possible class.", ), num_workers: int = typer.Option( None, help="Number of subprocesses to use for data loading.", ), weight_download_region: RegionEnum = typer.Option( None, help="Server region for downloading weights."), skip_load_validation: bool = typer.Option( None, help= "Skip check that verifies all videos can be loaded prior to inference. Only use if you're very confident all your videos can be loaded.", ), overwrite: bool = typer.Option( None, "--overwrite", "-o", help="Overwrite outputs in the save directory if they exist."), yes: bool = typer.Option( False, "--yes", "-y", help= "Skip confirmation of configuration and proceed right to prediction.", ), ): """Identify species in a video. This is a command line interface for prediction on camera trap footage. Given a path to camera trap footage, the predict function use a deep learning model to predict the presence or absense of a variety of species of common interest to wildlife researchers working with camera trap data. If an argument is specified in both the command line and in a yaml file, the command line input will take precedence. """ if config is not None: with config.open() as f: config_dict = yaml.safe_load(f) config_file = config else: with (MODELS_DIRECTORY / f"{model.value}/config.yaml").open() as f: config_dict = yaml.safe_load(f) config_file = None if "video_loader_config" in config_dict.keys(): video_loader_config = VideoLoaderConfig( **config_dict["video_loader_config"]) else: video_loader_config = None predict_dict = config_dict["predict_config"] # override if any command line arguments are passed if data_dir is not None: predict_dict["data_dir"] = data_dir if filepaths is not None: predict_dict["filepaths"] = filepaths if model != "time_distributed": predict_dict["model_name"] = model if checkpoint is not None: predict_dict["checkpoint"] = checkpoint if batch_size is not None: predict_dict["batch_size"] = batch_size if gpus is not None: predict_dict["gpus"] = gpus if dry_run is not None: predict_dict["dry_run"] = dry_run if save is not None: predict_dict["save"] = save # save_dir takes precedence over save if save_dir is not None: predict_dict["save_dir"] = save_dir if proba_threshold is not None: predict_dict["proba_threshold"] = proba_threshold if output_class_names is not None: predict_dict["output_class_names"] = output_class_names if num_workers is not None: predict_dict["num_workers"] = num_workers if weight_download_region is not None: predict_dict["weight_download_region"] = weight_download_region if skip_load_validation is not None: predict_dict["skip_load_validation"] = skip_load_validation if overwrite is not None: predict_dict["overwrite"] = overwrite try: manager = ModelManager( ModelConfig( video_loader_config=video_loader_config, predict_config=PredictConfig(**predict_dict), )) except ValidationError as ex: logger.error("Invalid configuration.") raise typer.Exit(ex) config = manager.config msg = f"""The following configuration will be used for inference: Config file: {config_file} Data directory: {data_dir if data_dir is not None else config_dict["predict_config"].get("data_dir")} Filepath csv: {filepaths if filepaths is not None else config_dict["predict_config"].get("filepaths")} Model: {config.predict_config.model_name} Checkpoint: {checkpoint if checkpoint is not None else config_dict["predict_config"].get("checkpoint")} Batch size: {config.predict_config.batch_size} Number of workers: {config.predict_config.num_workers} GPUs: {config.predict_config.gpus} Dry run: {config.predict_config.dry_run} Save directory: {config.predict_config.save_dir} Proba threshold: {config.predict_config.proba_threshold} Output class names: {config.predict_config.output_class_names} Weight download region: {config.predict_config.weight_download_region} """ if yes: typer.echo( f"{msg}\n\nSkipping confirmation and proceeding to prediction.") else: yes = typer.confirm( f"{msg}\n\nIs this correct?", abort=False, default=True, ) if yes: # kick off prediction manager.predict()
def predict_model( predict_config: PredictConfig, video_loader_config: VideoLoaderConfig = None, ): """Predicts from a model and writes out predictions to a csv. Args: predict_config (PredictConfig): Pydantic config for performing inference. video_loader_config (VideoLoaderConfig, optional): Pydantic config for preprocessing videos. If None, will use default for model specified in PredictConfig. """ # get default VLC for model if not specified if video_loader_config is None: video_loader_config = ModelConfig( predict_config=predict_config, video_loader_config=video_loader_config ).video_loader_config # set up model model = instantiate_model( checkpoint=predict_config.checkpoint, weight_download_region=predict_config.weight_download_region, model_cache_dir=predict_config.model_cache_dir, scheduler_config=None, labels=None, ) data_module = ZambaDataModule( video_loader_config=video_loader_config, transform=MODEL_MAPPING[model.__class__.__name__]["transform"], predict_metadata=predict_config.filepaths, batch_size=predict_config.batch_size, num_workers=predict_config.num_workers, ) validate_species(model, data_module) if video_loader_config.cache_dir is None: logger.info("No cache dir is specified. Videos will not be cached.") else: logger.info(f"Videos will be cached to {video_loader_config.cache_dir}.") trainer = pl.Trainer( gpus=predict_config.gpus, logger=False, fast_dev_run=predict_config.dry_run ) configuration = { "model_class": model.model_class, "species": model.species, "predict_config": json.loads(predict_config.json(exclude={"filepaths"})), "inference_start_time": datetime.utcnow().isoformat(), "video_loader_config": json.loads(video_loader_config.json()), } if predict_config.save is not False: config_path = predict_config.save_dir / "predict_configuration.yaml" logger.info(f"Writing out full configuration to {config_path}.") with config_path.open("w") as fp: yaml.dump(configuration, fp) dataloader = data_module.predict_dataloader() logger.info("Starting prediction...") probas = trainer.predict(model=model, dataloaders=dataloader) df = pd.DataFrame( np.vstack(probas), columns=model.species, index=dataloader.dataset.original_indices ) # change output format if specified if predict_config.proba_threshold is not None: df = (df > predict_config.proba_threshold).astype(int) elif predict_config.output_class_names: df = df.idxmax(axis=1) else: # round to a useful number of places df = df.round(5) if predict_config.save is not False: preds_path = predict_config.save_dir / "zamba_predictions.csv" logger.info(f"Saving out predictions to {preds_path}.") with preds_path.open("w") as fp: df.to_csv(fp, index=True) return df