def get_local_path(input_file, dest_dir): """ If user specified copying data to a local directory, get the local path where the data files were copied. - If input_file is just a file, we return the dest_dir/filename - If the intput_file is a directory, then we check if the environemt is SLURM and use slurm_dir or otherwise dest_dir to look up copy_complete file is available. If available, we return the directory. - If both above fail, we return the input_file as is. """ out = "" if g_pathmgr.isfile(input_file): out = os.path.join(dest_dir, os.path.basename(input_file)) elif g_pathmgr.isdir(input_file): data_name = input_file.strip("/").split("/")[-1] if "SLURM_JOBID" in os.environ: dest_dir = get_slurm_dir(dest_dir) dest_dir = os.path.join(dest_dir, data_name) complete_flag = os.path.join(dest_dir, "copy_complete") if g_pathmgr.isfile(complete_flag): out = dest_dir if g_pathmgr.exists(out): return out else: return input_file
def from_directory( cls, path: str, fps: float = 30.0, multithreaded_io=False, path_order_cache: Optional[Dict[str, List[str]]] = None, ): """ Args: path (str): path to frame video directory. fps (float): the target fps for the video. This is needed to link the frames to a second timestamp in the video. multithreaded_io (bool): controls whether parllelizable io operations are performed across multiple threads. path_order_cache (dict): An optional mapping from directory-path to list of frames in the directory in numerical order. Used for speedup by caching the frame paths. """ if path_order_cache is not None and path in path_order_cache: return cls.from_frame_paths(path_order_cache[path], fps, multithreaded_io) assert g_pathmgr.isdir(path), f"{path} is not a directory" rel_frame_paths = g_pathmgr.ls(path) def natural_keys(text): return [ int(c) if c.isdigit() else c for c in re.split("(\d+)", text) ] rel_frame_paths.sort(key=natural_keys) frame_paths = [os.path.join(path, f) for f in rel_frame_paths] if path_order_cache is not None: path_order_cache[path] = frame_paths return cls.from_frame_paths(frame_paths, fps, multithreaded_io)
def copy_data(input_file, destination_dir, num_threads, tmp_destination_dir): """ Copy data from one source to the other using num_threads. The data to copy can be a single file or a directory. We check what type of data and call the relevant functions. Returns: output_file (str): the new path of the data (could be file or dir) destination_dir (str): the destination dir that was actually used """ # return whatever the input is: whether "", None or anything else. logging.info(f"Creating directory: {destination_dir}") if not (destination_dir is None or destination_dir == ""): makedir(destination_dir) else: destination_dir = None if g_pathmgr.isfile(input_file): output_file, output_dir = copy_file( input_file, destination_dir, tmp_destination_dir ) elif g_pathmgr.isdir(input_file): output_file, output_dir = copy_dir(input_file, destination_dir, num_threads) else: raise RuntimeError("The input_file is neither a file nor a directory") return output_file, output_dir
def video_from_path(self, filepath, decode_audio=False, decoder="pyav", fps=30): try: is_file = g_pathmgr.isfile(filepath) is_dir = g_pathmgr.isdir(filepath) except NotImplementedError: # Not all PathManager handlers support is{file,dir} functions, when this is the # case, we default to assuming the path is a file. is_file = True is_dir = False if is_file: from pytorchvideo.data.encoded_video import EncodedVideo return EncodedVideo.from_path(filepath, decode_audio, decoder) elif is_dir: from pytorchvideo.data.frame_video import FrameVideo assert not decode_audio, "decode_audio must be False when using FrameVideo" return FrameVideo.from_directory( filepath, fps, path_order_cache=self.path_order_cache) else: raise FileNotFoundError(f"{filepath} not found.")
def __init__(self, cfg, data_source, path, split, dataset_name): super(DiskImageDataset, self).__init__( queue_size=cfg["DATA"][split]["BATCHSIZE_PER_REPLICA"]) assert data_source in [ "disk_filelist", "disk_folder", "disk_roi_annotations", ], "data_source must be either disk_filelist or disk_folder" if data_source == "disk_filelist": assert g_pathmgr.isfile(path), f"File {path} does not exist" elif data_source == "disk_folder": assert g_pathmgr.isdir(path), f"Directory {path} does not exist" elif data_source == "disk_roi_annotations": assert g_pathmgr.isfile(path), f"File {path} does not exist" assert path.endswith("json"), "Annotations must be in json format" self.cfg = cfg self.split = split self.dataset_name = dataset_name self.data_source = data_source self._path = path self.image_dataset = [] self.image_roi_bbox = [] self.is_initialized = False self._load_data(path) self._num_samples = len(self.image_dataset) self._remove_prefix = cfg["DATA"][self.split]["REMOVE_IMG_PATH_PREFIX"] self._new_prefix = cfg["DATA"][self.split]["NEW_IMG_PATH_PREFIX"] if self.data_source in ["disk_filelist", "disk_roi_annotations"]: # Set dataset to null so that workers dont need to pickle this file. # This saves memory when disk_filelist is large, especially when memory mapping. self.image_dataset = [] self.image_roi_bbox = [] # whether to use QueueDataset class to handle invalid images or not self.enable_queue_dataset = cfg["DATA"][ self.split]["ENABLE_QUEUE_DATASET"]
def from_directory( cls, path: str, fps: float = 30.0, multithreaded_io=False, ): assert g_pathmgr.isdir(path), f"{path} is not a directory" frame_paths = list(glob.glob(os.path.join(path, "*"))) return cls.from_frame_paths(frame_paths, fps, multithreaded_io)
def __init__( self, cfg: AttrDict, data_source: str, path: str, split: str, dataset_name: str ): super().__init__() assert g_pathmgr.isdir(path), f"Directory {path} does not exist" self.dataset_name = dataset_name self.path = path self.split = split.lower() self.dataset = self._load_dataset()
def from_path(cls, data_path: str) -> LabeledVideoPaths: """ Factory function that creates a LabeledVideoPaths object depending on the path type. - If it is a directory path it uses the LabeledVideoPaths.from_directory function. - If it's a file it uses the LabeledVideoPaths.from_csv file. Args: file_path (str): The path to the file to be read. """ if g_pathmgr.isfile(data_path): return LabeledVideoPaths.from_csv(data_path) elif g_pathmgr.isdir(data_path): return LabeledVideoPaths.from_directory(data_path) else: raise FileNotFoundError(f"{data_path} not found.")
def __init__(self, cfg): """ Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ self.source = g_pathmgr.get_local_path(path=cfg.DEMO.INPUT_VIDEO) self.fps = None if g_pathmgr.isdir(self.source): self.fps = cfg.DEMO.FPS self.video_name = self.source.split("/")[-1] self.source = os.path.join(self.source, "{}_%06d.jpg".format(self.video_name)) else: self.video_name = self.source.split("/")[-1] self.video_name = self.video_name.split(".")[0] self.cfg = cfg self.cap = cv2.VideoCapture(self.source) if self.fps is None: self.fps = self.cap.get(cv2.CAP_PROP_FPS) self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) self.display_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) self.display_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) if not self.cap.isOpened(): raise IOError("Video {} cannot be opened".format(self.source)) self.output_file = None if cfg.DEMO.OUTPUT_FILE != "": self.output_file = self.get_output_file(cfg.DEMO.OUTPUT_FILE) self.pred_boxes, self.gt_boxes = load_boxes_labels( cfg, self.video_name, self.fps, self.display_width, self.display_height, ) self.seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE self.no_frames_repeat = cfg.DEMO.SLOWMO