def _find_tables( db: audformat.Database, db_root: str, version: str, deps: Dependencies, verbose: bool, ) -> typing.List[str]: r"""Update tables.""" # release dependencies to removed tables db_tables = [f'db.{table}.csv' for table in db.tables] for file in set(deps.tables) - set(db_tables): deps._drop(file) tables = [] for table in audeer.progress_bar( db.tables, desc='Find tables', disable=not verbose, ): file = f'db.{table}.csv' checksum = audbackend.md5(os.path.join(db_root, file)) if file not in deps or checksum != deps.checksum(file): deps._add_meta(file, version, table, checksum) tables.append(table) return tables
def test_progress_bar(): assert audeer.config.TQDM_DESCLEN == 60 assert audeer.config.TQDM_FORMAT == ( '{percentage:3.0f}%|{bar} [{elapsed}<{remaining}] ' '{desc:' + str(audeer.config.TQDM_DESCLEN) + '}') pbar = audeer.progress_bar([.1]) for step in pbar: time.sleep(step)
def _missing_tables( db_root: str, tables: typing.Sequence[str], verbose: bool, ) -> typing.Sequence[str]: missing_tables = [] for table in audeer.progress_bar( tables, desc='Missing tables', disable=not verbose, ): file = f'db.{table}.csv' path = os.path.join(db_root, file) if not os.path.exists(path): missing_tables.append(file) return missing_tables
def _missing_media( db_root: str, media: typing.Sequence[str], flavor: Flavor, verbose: bool, ) -> typing.Sequence[str]: missing_media = [] for file in audeer.progress_bar(media, desc='Missing media', disable=not verbose): path = os.path.join(db_root, file) if flavor.format is not None: path = audeer.replace_file_extension(path, flavor.format) if not os.path.exists(path): missing_media.append(file) return missing_media
def _cached_files( files: typing.Sequence[str], deps: Dependencies, cached_versions: typing.Sequence[typing.Tuple[LooseVersion, str, Dependencies], ], flavor: typing.Optional[Flavor], verbose: bool, ) -> (typing.Sequence[typing.Union[str, str]], typing.Sequence[str]): r"""Find cached files.""" cached_files = [] missing_files = [] for file in audeer.progress_bar( files, desc='Cached files', disable=not verbose, ): found = False file_version = LooseVersion(deps.version(file)) for cache_version, cache_root, cache_deps in cached_versions: if cache_version >= file_version: if file in cache_deps: if deps.checksum(file) == cache_deps.checksum(file): path = os.path.join(cache_root, file) if flavor and flavor.format is not None: path = audeer.replace_file_extension( path, flavor.format, ) if os.path.exists(path): found = True break if found: if flavor and flavor.format is not None: file = audeer.replace_file_extension( file, flavor.format, ) cached_files.append((cache_root, file)) else: missing_files.append(file) return cached_files, missing_files
def _find_media( db: audformat.Database, db_root: str, version: str, deps: Dependencies, archives: typing.Mapping[str, str], verbose: bool, ) -> typing.Set[str]: # release dependencies to removed media # and select according archives for upload media = set() db_media = db.files for file in set(deps.media) - set(db_media): media.add(deps.archive(file)) deps._drop(file) # update version of altered media and insert new ones for file in audeer.progress_bar( db_media, desc='Find media', disable=not verbose, ): path = os.path.join(db_root, file) if file not in deps: checksum = audbackend.md5(path) if file in archives: archive = archives[file] else: archive = audeer.uid(from_string=file.replace('\\', '/')) deps._add_media(db_root, file, version, archive, checksum) elif not deps.removed(file): checksum = audbackend.md5(path) if checksum != deps.checksum(file): archive = deps.archive(file) deps._add_media(db_root, file, version, archive, checksum) return media
def download( url: str, destination: str = '.', *, chunk: int = 4 * 1024, force_download: bool = True, verbose=False, ) -> str: r"""Download an artifact. Args: url: artifact URL destination: path to store the artifact, can be a folder or a file name chunk: amount of data read at once during the download force_download: forces the artifact to be downloaded even if it exists locally already verbose: show information on the download process Returns: path to local artifact Raises: RuntimeError: if artifact cannot be found, or you don't have access rights to the artifact Example: >>> file = download( ... ( ... 'https://audeering.jfrog.io/artifactory/' ... 'data-public/emodb/db/1.1.0/db-1.1.0.yaml' ... ), ... ) >>> os.path.basename(file) 'db-1.1.0.yaml' """ destination = audeer.safe_path(destination) if os.path.isdir(destination): destination = os.path.join(destination, os.path.basename(url)) if os.path.exists(destination) and not force_download: return destination src_path = _path(url) if not src_path.exists(): raise RuntimeError(f"Source '{url}' does not exists.") src_size = ArtifactoryPath.stat(src_path).size with audeer.progress_bar(total=src_size, disable=not verbose) as pbar: desc = audeer.format_display_message( 'Download {}'.format(os.path.basename(str(src_path))), pbar=True, ) pbar.set_description_str(desc) pbar.refresh() try: dst_size = 0 with src_path.open() as src_fp: with open(destination, 'wb') as dst_fp: while src_size > dst_size: data = src_fp.read(chunk) n_data = len(data) if n_data > 0: dst_fp.write(data) dst_size += n_data pbar.update(n_data) except (KeyboardInterrupt, Exception): # Clean up broken artifact files if os.path.exists(destination): os.remove(destination) # pragma: no cover raise return destination