示例#1
0
async def save_file(
    db_client: DBClient,
    namespace: Namespace,
    path: StrOrPath,
    content: IO[bytes],
) -> File:
    """
    Save file to storage and database.

    If file name is already taken, then file will be saved under a new name.
    For example - if target name 'f.txt' is taken, then new name will be 'f (1).txt'.

    Args:
        db_client (DBClient): Database client.
        namespace (Namespace): Namespace where a file should be saved.
        path (StrOrPath): Path where a file will be saved.
        content (IO): Actual file.

    Raises:
        NotADirectory: If one of the path parents is not a folder.

    Returns:
        File: Saved file.
    """
    parent = os.path.normpath(os.path.dirname(path))

    if not await crud.file.exists(db_client, namespace.path, parent):
        await create_folder(db_client, namespace, parent)

    next_path = await crud.file.next_path(db_client, namespace.path, path)

    storage_file = await storage.save(namespace.path, next_path, content)

    mediatype = mediatypes.guess(next_path, content)
    dhash = hashes.dhash(content, mediatype=mediatype)

    async for tx in db_client.transaction():  # pragma: no branch
        async with tx:
            file = await crud.file.create(
                tx,
                namespace.path,
                next_path,
                size=storage_file.size,
                mediatype=mediatype,
            )
            if dhash is not None:
                await crud.fingerprint.create(
                    tx,
                    file.id,
                    fp=dhash,
                )

    return file
示例#2
0
    async def __call__(
        self,
        ns_path: StrOrPath,
        path: StrOrPath = None,
        content: bytes | BytesIO = b"I'm Dummy File!",
    ) -> File:
        path = path or fake.unique.file_name(category="text", extension="txt")
        parent = os.path.normpath(os.path.dirname(path))

        await storage.makedirs(ns_path, parent)
        if not await crud.file.exists(self._db_conn, ns_path, parent):
            await crud.file.create_folder(self._db_conn, ns_path, parent)

        if isinstance(content, bytes):
            content = BytesIO(content)

        file = await storage.save(ns_path, path, content=content)
        return await crud.file.create(self._db_conn,
                                      ns_path,
                                      path,
                                      size=file.size,
                                      mediatype=mediatypes.guess(
                                          path, content))
示例#3
0
def test_guess_based_on_file_content():
    jpeg_header = b'\xff\xd8\xff\xe0\x00\x10'
    assert mediatypes.guess("image", file=jpeg_header) == "image/jpeg"
示例#4
0
def test_guess_but_filename_does_not_have_suffix():
    assert mediatypes.guess("f") == mediatypes.OCTET_STREAM
示例#5
0
def test_guess_based_on_file_content_with_fallback_to_filename():
    assert mediatypes.guess("f.txt", file=b"dummy") == "text/plain"
示例#6
0
def test_guess_based_on_filename():
    assert mediatypes.guess("f.txt") == "text/plain"
示例#7
0
async def reconcile(db_client: DBClient, namespace: Namespace) -> None:
    """
    Create files that are missing in the database, but present in the storage and remove
    files that are present in the database, but missing in the storage.

    Args:
        db_client (DBClient): Database client.
        namespace (Namespace): Namespace where file will be reconciled.

    Raises:
        errors.FileNotFound: If path to a folder does not exists.
        errors.NotADirectory: If path to a folder is not a directory.
    """
    ns_path = str(namespace.path)
    folders = deque(["."])
    missing = []
    to_fingerprint = []

    # For now, it is faster to re-create all files from scratch
    # than iterating through large directories looking for one missing/dangling file
    await crud.file.delete_all(db_client, ns_path)
    await crud.file.create_home_folder(db_client, ns_path)

    while True:
        try:
            folder = folders.pop()
        except IndexError:
            break

        for file in await storage.iterdir(ns_path, folder):
            if file.is_dir():
                folders.append(file.path)
                size = 0
                mediatype = mediatypes.FOLDER
            else:
                size = file.size
                mediatype = mediatypes.guess(file.name)

            if mediatypes.is_image(mediatype):
                to_fingerprint.append(file.path)

            missing.append(
                File(
                    id=None,  # type: ignore
                    name=file.name,
                    path=file.path,
                    size=size,
                    mtime=file.mtime,
                    mediatype=mediatype,
                ))

    mediatype_names = set(file.mediatype for file in missing)
    await crud.mediatype.create_missing(db_client, names=mediatype_names)

    chunk_size = min(len(missing), 500)
    await asyncio.gather(
        *(crud.file.create_batch(db_client, ns_path, files=chunk)
          for chunk in itertools.zip_longest(*[iter(missing)] * chunk_size)))

    await crud.file.restore_all_folders_size(db_client, ns_path)

    loop = asyncio.get_running_loop()
    with concurrent.futures.ProcessPoolExecutor() as executor:
        tasks = [
            loop.run_in_executor(executor, _reconcile_calc_fp, storage,
                                 ns_path, path) for path in to_fingerprint
        ]
        fingerprints = await asyncio.gather(*tasks)

    chunk_size = min(len(to_fingerprint), 500)
    chunks = [zip(to_fingerprint, fingerprints)] * chunk_size
    await asyncio.gather(*(crud.fingerprint.create_batch(
        db_client, namespace=ns_path, fingerprints=chunk)
                           for chunk in itertools.zip_longest(*chunks)))