def local(team: Optional[str] = None): """Lists synced datasets, stored in the specified path. """ table = Table(["name", "images", "sync_date", "size"], [Table.L, Table.R, Table.R, Table.R]) client = _load_client(offline=True) for dataset_path in client.list_local_datasets(team=team): table.add_row( { "name": f"{dataset_path.parent.name}/{dataset_path.name}", "images": sum(1 for _ in find_files([dataset_path])), "sync_date": humanize.naturaldate(datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime)), "size": humanize.naturalsize(sum(p.stat().st_size for p in find_files([dataset_path]))), } ) # List deprecated datasets deprecated_local_datasets = client.list_deprecated_local_datasets() if deprecated_local_datasets: for dataset_path in client.list_deprecated_local_datasets(): table.add_row( { "name": dataset_path.name + " (deprecated format)", "images": sum(1 for _ in find_files([dataset_path])), "sync_date": humanize.naturaldate(datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime)), "size": humanize.naturalsize(sum(p.stat().st_size for p in find_files([dataset_path]))), } ) print(table) if len(list(deprecated_local_datasets)): print( f"\nWARNING: found some local datasets that use a deprecated format " f"not supported by the recent version of darwin-py. " f"Run `darwin dataset migrate team_slug/dataset_slug` " "if you want to be able to use them in darwin-py." )
def local(team: Optional[str] = None) -> None: """ Lists synced datasets, stored in the specified path. Parameters ---------- team: Optional[str] The name of the team to list, or the defautl one if no team is given. Defaults to None. """ table: Table = Table(show_header=True, header_style="bold cyan") table.add_column("Name") table.add_column("Image Count", justify="right") table.add_column("Sync Date", justify="right") table.add_column("Size", justify="right") client: Client = _load_client(offline=True) for dataset_path in client.list_local_datasets(team_slug=team): files_in_dataset_path = find_files([dataset_path]) table.add_row( f"{dataset_path.parent.name}/{dataset_path.name}", str(len(files_in_dataset_path)), humanize.naturaldate( datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime)), humanize.naturalsize( sum(p.stat().st_size for p in files_in_dataset_path)), ) Console().print(table)
def local(): """Lists synced datasets, stored in the specified path. """ table = Table(["name", "images", "sync_date", "size"], [Table.L, Table.R, Table.R, Table.R]) client = _load_client(offline=True) for dataset_path in client.list_local_datasets(): table.add_row({ "name": dataset_path.name, "images": sum(1 for _ in find_files([dataset_path])), "sync_date": humanize.naturaldate( datetime.datetime.fromtimestamp(dataset_path.stat().st_mtime)), "size": humanize.naturalsize( sum(p.stat().st_size for p in find_files([dataset_path]))), }) print(table)
def push( self, files_to_upload: List[str], blocking: bool = True, multi_threaded: bool = True, fps: int = 1, as_frames: bool = False, files_to_exclude: Optional[List[str]] = None, resume: bool = False, path: Optional[str] = None, ): """Uploads a local dataset (images ONLY) in the datasets directory. Parameters ---------- files_to_upload : list[Path] List of files to upload. It can be a folder. blocking : bool If False, the dataset is not uploaded and a generator function is returned instead multi_threaded : bool Uses multiprocessing to upload the dataset in parallel. If blocking is False this has no effect. files_to_exclude : list[str] List of files to exclude from the file scan (which is done only if files is None) fps : int Number of file per seconds to upload as_frames: bool Annotate as video. resume : bool Flag for signalling the resuming of a push path: str Optional path to put the files into Returns ------- generator : function Generator for doing the actual uploads. This is None if blocking is True count : int The files count """ # paths needs to start with / if path and path[0] != "/": path = f"/{path}" # This is where the responses from the upload function will be saved/load for resume self.local_path.parent.mkdir(exist_ok=True) responses_path = self.local_path.parent / ".upload_responses.json" # Init optional parameters if files_to_exclude is None: files_to_exclude = [] if files_to_upload is None: raise NotFound("Dataset location not found. Check your path.") if resume: if not responses_path.exists(): raise NotFound("Dataset location not found. Check your path.") with responses_path.open() as f: logged_responses = json.load(f) files_to_exclude.extend([ response["file_path"] for response in logged_responses if response["s3_response_status_code"].startswith("2") ]) files_to_upload = find_files(files=files_to_upload, recursive=True, files_to_exclude=files_to_exclude) if not files_to_upload: raise ValueError( "No files to upload, check your path, exclusion filters and resume flag" ) progress, count = add_files_to_dataset( client=self.client, dataset_id=str(self.dataset_id), filenames=files_to_upload, fps=fps, as_frames=as_frames, team=self.team, path=path, ) # If blocking is selected, upload the dataset remotely if blocking: responses = exhaust_generator(progress=progress, count=count, multi_threaded=multi_threaded) # Log responses to file if responses: responses = [{k: str(v) for k, v in response.items()} for response in responses] if resume: responses.extend(logged_responses) with responses_path.open("w") as f: json.dump(responses, f) return None, count else: return progress, count
def push( self, files_to_upload: Optional[List[Union[PathLike, LocalFile]]], *, blocking: bool = True, multi_threaded: bool = True, fps: int = 0, as_frames: bool = False, files_to_exclude: Optional[List[PathLike]] = None, path: Optional[str] = None, preserve_folders: bool = False, progress_callback: Optional[ProgressCallback] = None, file_upload_callback: Optional[FileUploadCallback] = None, ) -> UploadHandler: """Uploads a local dataset (images ONLY) in the datasets directory. Parameters ---------- files_to_upload : Optional[List[Union[PathLike, LocalFile]]] List of files to upload. Those can be folders. blocking : bool If False, the dataset is not uploaded and a generator function is returned instead. multi_threaded : bool Uses multiprocessing to upload the dataset in parallel. If blocking is False this has no effect. files_to_exclude : Optional[PathLike]] Optional list of files to exclude from the file scan. Those can be folders. fps : int When the uploading file is a video, specify its framerate. as_frames: bool When the uploading file is a video, specify whether it's going to be uploaded as a list of frames. path: Optional[str] Optional path to store the files in. preserve_folders : bool Specify whether or not to preserve folder paths when uploading progress_callback: Optional[ProgressCallback] Optional callback, called every time the progress of an uploading files is reported. file_upload_callback: Optional[FileUploadCallback] Optional callback, called every time a file chunk is uploaded. Returns ------- handler : UploadHandler Class for handling uploads, progress and error messages """ if files_to_exclude is None: files_to_exclude = [] if files_to_upload is None: raise ValueError("No files or directory specified.") uploading_files = [ item for item in files_to_upload if isinstance(item, LocalFile) ] search_files = [ item for item in files_to_upload if not isinstance(item, LocalFile) ] generic_parameters_specified = path is not None or fps != 0 or as_frames is not False if uploading_files and generic_parameters_specified: raise ValueError( "Cannot specify a path when uploading a LocalFile object.") for found_file in find_files(search_files, files_to_exclude=files_to_exclude): local_path = path if preserve_folders: source_files = [ source_file for source_file in search_files if is_relative_to(found_file, source_file) ] if source_files: local_path = str( found_file.relative_to(source_files[0]).parent) uploading_files.append( LocalFile(found_file, fps=fps, as_frames=as_frames, path=local_path)) if not uploading_files: raise ValueError( "No files to upload, check your path, exclusion filters and resume flag" ) handler = UploadHandler(self, uploading_files) if blocking: handler.upload( multi_threaded=multi_threaded, progress_callback=progress_callback, file_upload_callback=file_upload_callback, ) else: handler.prepare_upload() return handler