Exemplo n.º 1
0
def test_hashed_stream_reader(tmp_dir):
    tmp_dir.gen({"foo": "foo"})

    foo = tmp_dir / "foo"
    with open(foo, "rb") as fobj:
        stream_reader = HashedStreamReader(fobj)
        assert stream_reader.read(3) == b"foo"

    hex_digest = file_md5(foo, LocalFileSystem(None, {}))
    assert stream_reader.is_text_file
    assert hex_digest == stream_reader.hash_info.value
Exemplo n.º 2
0
def test_hashed_stream_reader_as_chunks(tmp_dir):
    tmp_dir.gen({"foo": b"foo \x00" * 16})

    foo = tmp_dir / "foo"
    with open(foo, "rb") as fobj:
        stream_reader = HashedStreamReader(fobj)
        while True:
            chunk = stream_reader.read(16)
            if not chunk:
                break

    hex_digest = file_md5(foo, LocalFileSystem(None, {}))
    assert not stream_reader.is_text_file
    assert hex_digest == stream_reader.hash_info.value
Exemplo n.º 3
0
def test_hashed_stream_reader_compatibility(tmp_dir, contents):
    # Always read more than the DEFAULT_CHUNK_SIZE (512 bytes).
    # This imitates the read actions performed by upload_fobj.
    chunk_size = DEFAULT_CHUNK_SIZE * 2

    tmp_dir.gen("data", contents)
    data = tmp_dir / "data"

    with open(data, "rb") as fobj:
        stream_reader = HashedStreamReader(fobj)
        stream_reader.read(chunk_size)

    local_fs = LocalFileSystem()
    hex_digest = file_md5(data, local_fs)

    assert stream_reader.is_text_file is istextfile(data, local_fs)
    assert stream_reader.hash_info.value == hex_digest
Exemplo n.º 4
0
def _upload_file(path_info, fs, odb, upload_odb):
    from dvc.utils import tmp_fname
    from dvc.utils.stream import HashedStreamReader

    tmp_info = upload_odb.path_info / tmp_fname()
    with fs.open(path_info, mode="rb", chunk_size=fs.CHUNK_SIZE) as stream:
        stream = HashedStreamReader(stream)
        size = fs.getsize(path_info)
        upload_odb.fs.upload(stream, tmp_info, desc=path_info.name, total=size)

    odb.add(tmp_info, upload_odb.fs, stream.hash_info)
    return path_info, odb.get(stream.hash_info)
Exemplo n.º 5
0
def test_hashed_stream_reader_as_chunks(tmp_dir):
    tmp_dir.gen({"foo": b"foo \x00" * 16})

    foo = tmp_dir / "foo"

    actual_size = len(foo.read_bytes())
    with open(foo, "rb") as fobj:
        stream_reader = HashedStreamReader(fobj)

        total_read = 0
        while True:
            chunk = stream_reader.read(16)
            total_read += len(chunk)
            assert stream_reader.tell() == total_read
            if not chunk:
                break

        assert stream_reader.tell() == actual_size == total_read

    hex_digest = file_md5(foo, LocalFileSystem(None, {}))
    assert not stream_reader.is_text_file
    assert hex_digest == stream_reader.hash_info.value
Exemplo n.º 6
0
def _upload_file(path_info, fs, odb):
    from dvc.utils import tmp_fname
    from dvc.utils.stream import HashedStreamReader

    tmp_info = odb.path_info / tmp_fname()
    with fs.open(path_info, mode="rb", chunk_size=fs.CHUNK_SIZE) as stream:
        stream = HashedStreamReader(stream)
        odb.fs.upload_fobj(stream,
                           tmp_info,
                           desc=path_info.name,
                           size=fs.getsize(path_info))

    obj = HashFile(tmp_info, odb.fs, stream.hash_info)
    return path_info, obj
Exemplo n.º 7
0
def _upload_file(from_fs_path, fs, odb, upload_odb):
    from dvc.utils import tmp_fname
    from dvc.utils.stream import HashedStreamReader

    fs_path = upload_odb.fs.path
    tmp_info = fs_path.join(upload_odb.fs_path, tmp_fname())
    with fs.open(from_fs_path, mode="rb", chunk_size=fs.CHUNK_SIZE) as stream:
        stream = HashedStreamReader(stream)
        size = fs.getsize(from_fs_path)
        upload_odb.fs.upload(
            stream, tmp_info, desc=fs_path.name(from_fs_path), total=size
        )

    odb.add(tmp_info, upload_odb.fs, stream.hash_info)
    meta = Meta(size=size)
    return from_fs_path, meta, odb.get(stream.hash_info)
Exemplo n.º 8
0
def test_hashed_stream_reader(tmp_dir):
    tmp_dir.gen({"foo": "foo"})

    foo = tmp_dir / "foo"
    with open(foo, "rb") as fobj:
        stream_reader = HashedStreamReader(fobj)

        assert stream_reader.readable()
        assert not stream_reader.seekable()

        assert stream_reader.read(2) == b"fo"
        assert stream_reader.tell() == 2

        assert stream_reader.read(1) == b"o"
        assert stream_reader.tell() == 3

    hex_digest = file_md5(foo, LocalFileSystem())
    assert stream_reader.is_text_file
    assert hex_digest == stream_reader.hash_info.value
Exemplo n.º 9
0
def _transfer_file(odb, from_fs, from_info):
    from dvc.utils import tmp_fname
    from dvc.utils.stream import HashedStreamReader

    tmp_info = odb.fs.path_info / tmp_fname()
    with from_fs.open(from_info, mode="rb",
                      chunk_size=from_fs.CHUNK_SIZE) as stream:
        stream_reader = HashedStreamReader(stream)
        # Since we don't know the hash beforehand, we'll
        # upload it to a temporary location and then move
        # it.
        odb.fs.upload_fobj(
            stream_reader,
            tmp_info,
            total=from_fs.getsize(from_info),
            desc=from_info.name,
        )

    hash_info = stream_reader.hash_info
    return tmp_info, hash_info
Exemplo n.º 10
0
    def _transfer_file(self, from_tree, from_info):
        from dvc.utils import tmp_fname
        from dvc.utils.stream import HashedStreamReader

        tmp_info = self.tree.path_info / tmp_fname()
        with from_tree.open(from_info,
                            mode="rb",
                            chunk_size=from_tree.CHUNK_SIZE) as stream:
            stream_reader = HashedStreamReader(stream)
            # Since we don't know the hash beforehand, we'll
            # upload it to a temporary location and then move
            # it.
            self.tree.upload_fobj(
                stream_reader,
                tmp_info,
                total=from_tree.getsize(from_info),
                desc=from_info.name,
            )

        hash_info = stream_reader.hash_info
        self.move(tmp_info, self.tree.hash_to_path_info(hash_info.value))
        return hash_info