Exemplo n.º 1
0
def _put_media(
    media: typing.Set[str],
    db_root: str,
    db_name: str,
    version: str,
    deps: Dependencies,
    backend: audbackend.Backend,
    num_workers: typing.Optional[int],
    verbose: bool,
):
    # create a mapping from archives to media and
    # select archives with new or altered files for upload
    map_media_to_files = collections.defaultdict(list)
    for file in deps.media:
        if not deps.removed(file):
            map_media_to_files[deps.archive(file)].append(file)
            if deps.version(file) == version:
                media.add(deps.archive(file))

    lock = threading.Lock()

    def job(archive):
        if archive in map_media_to_files:
            for file in map_media_to_files[archive]:
                with lock:
                    deps._add_media(db_root, file, version)
            archive_file = backend.join(
                db_name,
                define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
                archive,
            )
            backend.put_archive(
                db_root,
                map_media_to_files[archive],
                archive_file,
                version,
            )

    # upload new and altered archives if it contains at least one file
    audeer.run_tasks(
        job,
        params=[([archive], {}) for archive in media],
        num_workers=num_workers,
        progress_bar=verbose,
        task_description='Put media',
    )
Exemplo n.º 2
0
def _cached_files(
    files: typing.Sequence[str],
    deps: Dependencies,
    cached_versions: typing.Sequence[typing.Tuple[LooseVersion, str,
                                                  Dependencies], ],
    flavor: typing.Optional[Flavor],
    verbose: bool,
) -> (typing.Sequence[typing.Union[str, str]], typing.Sequence[str]):
    r"""Find cached files."""

    cached_files = []
    missing_files = []

    for file in audeer.progress_bar(
            files,
            desc='Cached files',
            disable=not verbose,
    ):
        found = False
        file_version = LooseVersion(deps.version(file))
        for cache_version, cache_root, cache_deps in cached_versions:
            if cache_version >= file_version:
                if file in cache_deps:
                    if deps.checksum(file) == cache_deps.checksum(file):
                        path = os.path.join(cache_root, file)
                        if flavor and flavor.format is not None:
                            path = audeer.replace_file_extension(
                                path,
                                flavor.format,
                            )
                        if os.path.exists(path):
                            found = True
                            break
        if found:
            if flavor and flavor.format is not None:
                file = audeer.replace_file_extension(
                    file,
                    flavor.format,
                )
            cached_files.append((cache_root, file))
        else:
            missing_files.append(file)

    return cached_files, missing_files
Exemplo n.º 3
0
def _get_media(
    media: typing.List[str],
    db_root: str,
    db_root_tmp: str,
    db_name: str,
    deps: Dependencies,
    backend: audbackend.Backend,
    num_workers: typing.Optional[int],
    verbose: bool,
):

    # create folder tree to avoid race condition
    # in os.makedirs when files are unpacked
    for file in media:
        audeer.mkdir(os.path.dirname(os.path.join(db_root, file)))
        audeer.mkdir(os.path.dirname(os.path.join(db_root_tmp, file)))

    # figure out archives
    archives = set()
    for file in media:
        archives.add((deps.archive(file), deps.version(file)))

    def job(archive: str, version: str):
        archive = backend.join(
            db_name,
            define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
            archive,
        )
        files = backend.get_archive(archive, db_root_tmp, version)
        for file in files:
            _move_file(db_root_tmp, db_root, file)

    audeer.run_tasks(
        job,
        params=[([archive, version], {}) for archive, version in archives],
        num_workers=num_workers,
        progress_bar=verbose,
        task_description='Get media',
    )
Exemplo n.º 4
0
def _get_media_from_backend(
    name: str,
    media: typing.Sequence[str],
    db_root: str,
    db_root_tmp: str,
    flavor: typing.Optional[Flavor],
    deps: Dependencies,
    backend: audbackend.Backend,
    num_workers: typing.Optional[int],
    verbose: bool,
):
    r"""Load media from backend."""

    # figure out archives
    archives = set()
    archive_names = set()
    for file in media:
        archive_name = deps.archive(file)
        archive_version = deps.version(file)
        archives.add((archive_name, archive_version))
        archive_names.add(archive_name)
    # collect all files that will be extracted,
    # if we have more files than archives
    if len(deps.files) > len(deps.archives):
        files = list()
        for file in deps.media:
            archive = deps.archive(file)
            if archive in archive_names:
                files.append(file)
        media = files

    # create folder tree to avoid race condition
    # in os.makedirs when files are unpacked
    # using multi-processing
    for file in media:
        audeer.mkdir(os.path.dirname(os.path.join(db_root, file)))
        audeer.mkdir(os.path.dirname(os.path.join(db_root_tmp, file)))

    def job(archive: str, version: str):
        archive = backend.join(
            name,
            define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
            archive,
        )
        # extract and move all files that are stored in the archive,
        # even if only a single file from the archive was requested
        files = backend.get_archive(archive, db_root_tmp, version)
        for file in files:
            if flavor is not None:
                bit_depth = deps.bit_depth(file)
                channels = deps.channels(file)
                sampling_rate = deps.sampling_rate(file)
                src_path = os.path.join(db_root_tmp, file)
                file = flavor.destination(file)
                dst_path = os.path.join(db_root_tmp, file)
                flavor(
                    src_path,
                    dst_path,
                    src_bit_depth=bit_depth,
                    src_channels=channels,
                    src_sampling_rate=sampling_rate,
                )
                if src_path != dst_path:
                    os.remove(src_path)

            _move_file(db_root_tmp, db_root, file)

    audeer.run_tasks(
        job,
        params=[([archive, version], {}) for archive, version in archives],
        num_workers=num_workers,
        progress_bar=verbose,
        task_description='Load media',
    )