Пример #1
0
def test_video(streaming, ctx):
    rng = np.random.RandomState(0)
    shape = (256, 64, 64, 3)
    video_data = rng.randint(0, 256, size=np.prod(shape),
                             dtype=np.uint8).reshape(shape)

    with ctx() as path:
        with bf.BlobFile(path, mode="wb", streaming=streaming) as wf:
            with imageio.get_writer(
                    wf,
                    format="ffmpeg",
                    quality=None,
                    codec="libx264rgb",
                    pixelformat="bgr24",
                    output_params=["-f", "mp4", "-crf", "0"],
            ) as w:
                for frame in video_data:
                    w.append_data(frame)

        with bf.BlobFile(path, mode="rb", streaming=streaming) as rf:
            with imageio.get_reader(rf,
                                    format="ffmpeg",
                                    input_params=["-f", "mp4"]) as r:
                for idx, frame in enumerate(r):
                    assert np.array_equal(frame, video_data[idx])

        with bf.BlobFile(path, mode="rb", streaming=streaming) as rf:
            container = av.open(rf)
            stream = container.streams.video[0]
            for idx, frame in enumerate(container.decode(stream)):
                assert np.array_equal(frame.to_image(), video_data[idx])
Пример #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--path", required=True)
    parser.add_argument("--no-streaming-read-request", action="store_true")
    parser.add_argument("--buffer-size", default=8192, type=int)
    parser.add_argument("--size", default=1_000_000_000, type=int)
    args = parser.parse_args()

    bf.configure(use_streaming_read_request=not args.no_streaming_read_request)

    path = bf.join(args.path, "large.bin")
    data = (b"meow" * 249 + b"mew\n") * (args.size // 1000)
    with timer("write_large_file"):
        with bf.BlobFile(path, "wb") as f:
            f.write(data)

    start = time.time()
    with timer("read_large_file"):
        with bf.BlobFile(path, "rb", buffer_size=args.buffer_size) as f:
            f.read()
    end = time.time()
    print(f"MB/s {len(data) /1e6/(end - start)}")

    with timer("read_large_file_lines"):
        with bf.BlobFile(path, "r", buffer_size=args.buffer_size) as f:
            for _ in f:
                pass

    with timer("seek_speed"):
        with bf.BlobFile(path, "rb", buffer_size=args.buffer_size) as f:
            for i in range(min(10_000, args.size)):
                f.seek(i)
                f.read(1)
Пример #3
0
def cache_folder(name, dirpath, options, build_fn):
    if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
        # we don't have any credentials to do the caching, always build in this case
        print(f"building without cache for {name}")
        start = time.time()
        build_fn()
        print(f"build elapsed {time.time() - start}")
        return

    options_hash = hashlib.md5("|".join(options).encode("utf8")).hexdigest()
    cache_path = bf.join(f"gs://{GCS_BUCKET}", "cache",
                         f"{name}-{options_hash}.tar")
    if os.path.exists(dirpath):
        print(f"cache for {name} found locally")
    elif bf.exists(cache_path):
        print(f"downloading cache for {name}: {cache_path}")
        start = time.time()
        with bf.BlobFile(cache_path, "rb") as f:
            with tarfile.open(fileobj=f, mode="r") as tf:
                tf.extractall()
        print(f"download elapsed {time.time() - start}")
    else:
        print(f"building cache for {name}")
        start = time.time()
        build_fn()
        print(f"cache build elapsed {time.time() - start}")
        print(f"uploading cache for {name}")
        start = time.time()
        if not bf.exists(cache_path):
            with bf.BlobFile(cache_path, "wb") as f:
                with tarfile.open(fileobj=f, mode="w") as tf:
                    tf.add(dirpath)
        print(f"upload elapsed {time.time() - start}")
Пример #4
0
async def test_copy(any_dir, other_any_dir):
    MIN_CHUNK_SIZE = 256 * 1024

    with open("/dev/random", "rb") as f:
        contents_medium = f.read(16 * MIN_CHUNK_SIZE)
    helpers.create_file(any_dir / "original_medium", contents_medium)

    contents_known_small = b"abcdefgh"
    helpers.create_file(any_dir / "original_small", contents_known_small)

    async with bbb.BoostExecutor(100) as e:
        with bbb.globals.configure(chunk_size=MIN_CHUNK_SIZE):
            await bbb.copyfile(any_dir / "original_medium",
                               other_any_dir / "copied_medium", e)
            with blobfile.BlobFile(str(other_any_dir / "copied_medium"),
                                   "rb") as f:
                assert f.read() == contents_medium

        await bbb.copyfile(
            any_dir / "original_small",
            other_any_dir / "copied_small",
            e,
            size=len(contents_known_small),
        )
        with blobfile.BlobFile(str(other_any_dir / "copied_small"), "rb") as f:
            assert f.read() == contents_known_small
Пример #5
0
def test_large_file(ctx):
    contents = b"0" * 2**32
    with ctx() as path:
        with bf.BlobFile(path, "wb", streaming=True) as f:
            f.write(contents)
        with bf.BlobFile(path, "rb", streaming=True) as f:
            assert contents == f.read()
Пример #6
0
def test_read_stats(buffer_size, ctx):
    with ctx() as path:
        contents = b"meow!"

        with bf.BlobFile(path, "wb") as w:
            w.write(contents)

        with bf.BlobFile(path, "rb", buffer_size=buffer_size) as r:
            r.read(1)

        if buffer_size == 1:
            assert r.raw.bytes_read == 1  # type: ignore
        else:
            assert r.raw.bytes_read == len(contents)  # type: ignore

        with bf.BlobFile(path, "rb", buffer_size=buffer_size) as r:
            r.read(1)
            r.seek(4)
            r.read(1)
            r.seek(1000000)
            assert r.read(1) == b""

        if buffer_size == 1:
            assert r.raw.requests == 2  # type: ignore
            assert r.raw.bytes_read == 2  # type: ignore
        else:
            assert r.raw.requests == 1  # type: ignore
            assert r.raw.bytes_read == len(contents)  # type: ignore
Пример #7
0
def test_glob(ctx, parallel):
    contents = b"meow!"
    with ctx() as path:
        dirpath = bf.dirname(path)
        a_path = bf.join(dirpath, "ab")
        with bf.BlobFile(a_path, "wb") as w:
            w.write(contents)
        b_path = bf.join(dirpath, "bb")
        with bf.BlobFile(b_path, "wb") as w:
            w.write(contents)

        def assert_listing_equal(path, desired):
            desired = sorted([bf.join(dirpath, p) for p in desired])
            actual = sorted(list(bf.glob(path, parallel=parallel)))
            assert actual == desired, f"{actual} != {desired}"

        assert_listing_equal(bf.join(dirpath, "*b"), ["ab", "bb"])
        assert_listing_equal(bf.join(dirpath, "a*"), ["ab"])
        assert_listing_equal(bf.join(dirpath, "ab*"), ["ab"])
        assert_listing_equal(bf.join(dirpath, "*"), ["ab", "bb"])
        assert_listing_equal(bf.join(dirpath, "bb"), ["bb"])

        path = bf.join(dirpath, "test.txt")
        with bf.BlobFile(path, "wb") as w:
            w.write(contents)
        path = bf.join(dirpath, "subdir", "test.txt")
        bf.makedirs(bf.dirname(path))
        with bf.BlobFile(path, "wb") as f:
            f.write(contents)
        path = bf.join(dirpath, "subdir", "subsubdir", "test.txt")
        if "://" not in path:
            # implicit directory
            bf.makedirs(bf.dirname(path))
        with bf.BlobFile(path, "wb") as f:
            f.write(contents)

        assert_listing_equal(bf.join(dirpath, "*/test.txt"), ["subdir/test.txt"])
        assert_listing_equal(bf.join(dirpath, "*/*.txt"), ["subdir/test.txt"])
        if "://" in path:
            # local glob doesn't handle ** the same way as remote glob
            assert_listing_equal(
                bf.join(dirpath, "**.txt"),
                ["test.txt", "subdir/test.txt", "subdir/subsubdir/test.txt"],
            )
        else:
            assert_listing_equal(bf.join(dirpath, "**.txt"), ["test.txt"])
        assert_listing_equal(bf.join(dirpath, "*/test"), [])
        assert_listing_equal(bf.join(dirpath, "subdir/test.txt"), ["subdir/test.txt"])

        # directories
        assert_listing_equal(bf.join(dirpath, "*"), ["ab", "bb", "subdir", "test.txt"])
        assert_listing_equal(bf.join(dirpath, "subdir"), ["subdir"])
        assert_listing_equal(bf.join(dirpath, "subdir/"), ["subdir"])
        assert_listing_equal(bf.join(dirpath, "*/"), ["subdir"])
        assert_listing_equal(bf.join(dirpath, "*dir"), ["subdir"])
        assert_listing_equal(bf.join(dirpath, "subdir/*dir"), ["subdir/subsubdir"])
        assert_listing_equal(bf.join(dirpath, "subdir/*dir/"), ["subdir/subsubdir"])
        assert_listing_equal(bf.join(dirpath, "su*ir/*dir/"), ["subdir/subsubdir"])
Пример #8
0
def test_append(ctx):
    contents = b"meow!\n"
    additional_contents = b"purr\n"
    with ctx() as path:
        with bf.BlobFile(path, "ab", streaming=False) as w:
            w.write(contents)
        with bf.BlobFile(path, "ab", streaming=False) as w:
            w.write(additional_contents)
        with bf.BlobFile(path, "rb") as r:
            assert r.read() == contents + additional_contents
Пример #9
0
def test_read_write(ctx, streaming):
    contents = b"meow!\npurr\n"
    with ctx() as path:
        path = bf.join(path, "a folder", "a.file")
        bf.makedirs(bf.dirname(path))
        with bf.BlobFile(path, "wb", streaming=streaming) as w:
            w.write(contents)
        with bf.BlobFile(path, "rb", streaming=streaming) as r:
            assert r.read() == contents
        with bf.BlobFile(path, "rb", streaming=streaming) as r:
            lines = list(r)
            assert b"".join(lines) == contents
Пример #10
0
def test_listdir(ctx):
    contents = b"meow!"
    with ctx() as path:
        dirpath = bf.dirname(path)
        a_path = bf.join(dirpath, "a")
        with bf.BlobFile(a_path, "wb") as w:
            w.write(contents)
        b_path = bf.join(dirpath, "b")
        with bf.BlobFile(b_path, "wb") as w:
            w.write(contents)
        bf.makedirs(bf.join(dirpath, "c"))
        assert sorted(list(bf.listdir(dirpath))) == ["a", "b", "c"]
Пример #11
0
def test_md5(ctx):
    contents = b"meow!"
    meow_hash = hashlib.md5(contents).hexdigest()

    with ctx() as path:
        _write_contents(path, contents)
        assert bf.md5(path) == meow_hash
        with bf.BlobFile(path, "wb") as f:
            f.write(contents)
        assert bf.md5(path) == meow_hash
        with bf.BlobFile(path, "wb") as f:
            f.write(contents)
        assert bf.md5(path) == meow_hash
Пример #12
0
def test_concurrent_write_gcs():
    with _get_temp_gcs_path() as path:
        outer_contents = b"miso" * (2**20 + 1)
        inner_contents = b"momo" * (2**20 + 1)
        with bf.BlobFile(path, "wb", streaming=True) as f:
            f.write(outer_contents)
            with bf.BlobFile(path, "wb", streaming=True) as f:
                f.write(inner_contents)

        # the outer write will finish last and overwrite the inner one
        # the last writer to finish wins with this setup
        with bf.BlobFile(path, "rb") as f:
            assert f.read() == outer_contents
Пример #13
0
def test_rmtree(ctx):
    contents = b"meow!"
    with ctx() as path:
        root = bf.dirname(path)
        destroy_path = bf.join(root, "destroy")
        bf.makedirs(destroy_path)
        save_path = bf.join(root, "save")
        bf.makedirs(save_path)

        # implicit dir
        if not "://" in path:
            bf.makedirs(bf.join(destroy_path, "adir"))
        with bf.BlobFile(bf.join(destroy_path, "adir/b"), "wb") as w:
            w.write(contents)

        # explicit dir
        bf.makedirs(bf.join(destroy_path, "bdir"))
        with bf.BlobFile(bf.join(destroy_path, "bdir/b"), "wb") as w:
            w.write(contents)

        bf.makedirs(bf.join(save_path, "somedir"))
        with bf.BlobFile(bf.join(save_path, "somefile"), "wb") as w:
            w.write(contents)

        def assert_listing_equal(path, desired):
            actual = list(bf.walk(path))
            # ordering of os walk is weird, only compare sorted order
            assert sorted(actual) == sorted(desired), f"{actual} != {desired}"

        assert_listing_equal(
            root,
            [
                (root, ["destroy", "save"], []),
                (destroy_path, ["adir", "bdir"], []),
                (bf.join(destroy_path, "adir"), [], ["b"]),
                (bf.join(destroy_path, "bdir"), [], ["b"]),
                (save_path, ["somedir"], ["somefile"]),
                (bf.join(save_path, "somedir"), [], []),
            ],
        )

        bf.rmtree(destroy_path)

        assert_listing_equal(
            root,
            [
                (root, ["save"], []),
                (save_path, ["somedir"], ["somefile"]),
                (bf.join(save_path, "somedir"), [], []),
            ],
        )
Пример #14
0
def test_az_path():
    contents = b"meow!\npurr\n"
    with _get_temp_as_path() as path:
        path = _convert_https_to_az(path)
        path = bf.join(path, "a folder", "a.file")
        path = _convert_https_to_az(path)
        bf.makedirs(_convert_https_to_az(bf.dirname(path)))
        with bf.BlobFile(path, "wb") as w:
            w.write(contents)
        with bf.BlobFile(path, "rb") as r:
            assert r.read() == contents
        with bf.BlobFile(path, "rb") as r:
            lines = list(r)
            assert b"".join(lines) == contents
Пример #15
0
def test_concurrent_write_as():
    with _get_temp_as_path() as path:
        outer_contents = b"miso" * (2**20 + 1)
        inner_contents = b"momo" * (2**20 + 1)
        # the inner write will invalidate the outer one, the last writer
        # to start wins with this setup
        with pytest.raises(bf.ConcurrentWriteFailure):
            with bf.BlobFile(path, "wb", streaming=True) as f:
                f.write(outer_contents)
                with bf.BlobFile(path, "wb", streaming=True) as f:
                    f.write(inner_contents)

        # the outer write will finish last and overwrite the inner one
        with bf.BlobFile(path, "rb") as f:
            assert f.read() == inner_contents
Пример #16
0
    def save(self):
        def save_checkpoint(rate, params):
            state_dict = self.mp_trainer.master_params_to_state_dict(params)
            if dist.get_rank() == 0:
                logger.log(f"saving model {rate}...")
                if not rate:
                    filename = f"model{(self.step+self.resume_step):06d}.pt"
                else:
                    filename = f"ema_{rate}_{(self.step+self.resume_step):06d}.pt"
                with bf.BlobFile(bf.join(get_blob_logdir(), filename),
                                 "wb") as f:
                    th.save(state_dict, f)

        save_checkpoint(0, self.mp_trainer.master_params)
        for rate, params in zip(self.ema_rate, self.ema_params):
            save_checkpoint(rate, params)

        if dist.get_rank() == 0:
            with bf.BlobFile(
                    bf.join(get_blob_logdir(),
                            f"opt{(self.step+self.resume_step):06d}.pt"),
                    "wb",
            ) as f:
                th.save(self.opt.state_dict(), f)

        dist.barrier()
Пример #17
0
def test_overwrite_while_reading(ctx):
    chunk_size = 2**20
    contents = b"\x00" * chunk_size * 2
    alternative_contents = b"\xFF" * chunk_size * 4
    with ctx() as path:
        with bf.BlobFile(path, "wb") as f:
            f.write(contents)
        with bf.BlobFile(path, "rb") as f:
            read_contents = f.read(chunk_size)
            with bf.BlobFile(path, "wb") as f2:
                f2.write(alternative_contents)
            # close underlying connection
            f.raw._f = None  # type: ignore
            read_contents += f.read(chunk_size)
            assert (read_contents == contents[:chunk_size] +
                    alternative_contents[chunk_size:chunk_size * 2])
Пример #18
0
    def __getitem__(self, idx):
        path = self.local_images[idx]
        with bf.BlobFile(path, "rb") as f:
            pil_image = Image.open(f)
            pil_image.load()

        # We are not on a new enough PIL to support the `reducing_gap`
        # argument, which uses BOX downsampling at powers of two first.
        # Thus, we do it by hand to improve downsample quality.
        while min(*pil_image.size) >= 2 * self.resolution:
            pil_image = pil_image.resize(tuple(x // 2 for x in pil_image.size),
                                         resample=Image.BOX)

        scale = self.resolution / min(*pil_image.size)
        pil_image = pil_image.resize(tuple(
            round(x * scale) for x in pil_image.size),
                                     resample=Image.BICUBIC)

        arr = np.array(pil_image.convert("RGB"))
        crop_y = (arr.shape[0] - self.resolution) // 2
        crop_x = (arr.shape[1] - self.resolution) // 2
        arr = arr[crop_y:crop_y + self.resolution,
                  crop_x:crop_x + self.resolution]
        arr = arr.astype(np.float32) / 127.5 - 1

        out_dict = {}
        if self.local_classes is not None:
            out_dict["y"] = np.array(self.local_classes[idx], dtype=np.int64)
        return np.transpose(arr, [2, 0, 1]), out_dict
Пример #19
0
 def loadnpy(url):
     import blobfile
     from io import BytesIO
     fp = blobfile.BlobFile(url, "rb")
     x = np.load(BytesIO(fp.read()))
     fp.close()
     return x
Пример #20
0
 def all_examples():
     for file_name in input_file_names:
         with bf.BlobFile(file_name, "r") as f:
             for line in f:
                 encoded_example = json.loads(line)
                 example = jsonl_encoding.decode_example(encoded_example)
                 yield example
Пример #21
0
def test_composite_objects():
    with _get_temp_gcs_path() as remote_path:
        with _get_temp_local_path() as local_path:
            contents = b"0" * 2 * 2**20
            with open(local_path, "wb") as f:
                f.write(contents)
            sp.run(
                [
                    "gsutil",
                    "-o",
                    "GSUtil:parallel_composite_upload_threshold=1M",
                    "cp",
                    local_path,
                    remote_path,
                ],
                check=True,
            )

        assert hashlib.md5(contents).hexdigest() == bf.md5(remote_path)
        assert hashlib.md5(contents).hexdigest() == bf.md5(remote_path)

        with tempfile.TemporaryDirectory() as tmpdir:
            with bf.BlobFile(remote_path,
                             "rb",
                             cache_dir=tmpdir,
                             streaming=False) as f:
                assert f.read() == contents
Пример #22
0
 def __init__(self, bpe_path=None):
     if bpe_path == None:
         bpe_path = blobfile.BlobFile(
             'https://openaipublic.blob.core.windows.net/clip/bpe_simple_vocab_16e6.txt',
             'r')
     self.byte_encoder = bytes_to_unicode()
     self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
     merges = bpe_path.read().split('\n')
     merges = merges[1:49152 - 256 - 2 + 1]
     merges = [tuple(merge.split()) for merge in merges]
     vocab = list(bytes_to_unicode().values())
     vocab = vocab + [v + '</w>' for v in vocab]
     for merge in merges:
         vocab.append(''.join(merge))
     vocab.extend(['<|startoftext|>', '<|endoftext|>'])
     self.encoder = dict(zip(vocab, range(len(vocab))))
     self.decoder = {v: k for k, v in self.encoder.items()}
     self.bpe_ranks = dict(zip(merges, range(len(merges))))
     self.cache = {
         '<|startoftext|>': '<|startoftext|>',
         '<|endoftext|>': '<|endoftext|>'
     }
     self.pat = re.compile(
         r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""",
         re.IGNORECASE)
Пример #23
0
def open_file_cached(path, mode="r"):
    """ Given a GCS path url, caches the contents locally.
    WARNING: only use this function if contents under the path won't change!
    """
    with bf.BlobFile(path,
                     mode=mode,
                     cache_dir="/tmp/bf-file-cache",
                     streaming=False) as f:
        yield f
Пример #24
0
def verify_hash(ref_hash, path):
    with bf.BlobFile(path, "rb") as f:
        m = hashlib.md5()
        while True:
            block = f.read(CHUNK_SIZE)
            if block == b"":
                break
            m.update(block)
        assert m.hexdigest() == ref_hash
Пример #25
0
def test_cache_dir(ctx):
    cache_dir = tempfile.mkdtemp()
    contents = b"meow!"
    alternative_contents = b"purr!"
    with ctx() as path:
        with bf.BlobFile(path, mode="wb") as f:
            f.write(contents)
        with bf.BlobFile(path, mode="rb", streaming=False, cache_dir=cache_dir) as f:
            assert f.read() == contents
        content_hash = hashlib.md5(contents).hexdigest()
        cache_path = bf.join(cache_dir, content_hash, bf.basename(path))
        with open(cache_path, "rb") as f:
            assert f.read() == contents
        # alter the cached file to make sure we are not re-reading the remote file
        with open(cache_path, "wb") as f:
            f.write(alternative_contents)
        with bf.BlobFile(path, mode="rb", streaming=False, cache_dir=cache_dir) as f:
            assert f.read() == alternative_contents
Пример #26
0
 def save_checkpoint(rate, params):
     state_dict = self._master_params_to_state_dict(params)
     if dist.get_rank() == 0:
         logger.log(f"saving model {rate}...")
         if not rate:
             filename = f"model{(self.step+self.resume_step):06d}.pt"
         else:
             filename = f"ema_{rate}_{(self.step+self.resume_step):06d}.pt"
         with bf.BlobFile(bf.join(get_blob_logdir(), filename), "wb") as f:
             th.save(state_dict, f)
Пример #27
0
def test_walk(ctx, topdown):
    contents = b"meow!"
    with ctx() as path:
        dirpath = bf.dirname(path)
        a_path = bf.join(dirpath, "a")
        with bf.BlobFile(a_path, "wb") as w:
            w.write(contents)
        bf.makedirs(bf.join(dirpath, "c/d"))
        b_path = bf.join(dirpath, "c/d/b")
        with bf.BlobFile(b_path, "wb") as w:
            w.write(contents)
        expected = [
            (dirpath, ["c"], ["a"]),
            (bf.join(dirpath, "c"), ["d"], []),
            (bf.join(dirpath, "c", "d"), [], ["b"]),
        ]
        if not topdown:
            expected = list(reversed(expected))
        assert list(bf.walk(dirpath, topdown=topdown)) == expected
Пример #28
0
def load_state_dict(path, **kwargs):
    """
    Load a PyTorch file without redundant fetches across MPI ranks.
    """
    if MPI.COMM_WORLD.Get_rank() == 0:
        with bf.BlobFile(path, "rb") as f:
            data = f.read()
    else:
        data = None
    data = MPI.COMM_WORLD.bcast(data)
    return th.load(io.BytesIO(data), **kwargs)
Пример #29
0
def test_scandir(ctx):
    contents = b"meow!"
    with ctx() as path:
        dirpath = bf.dirname(path)
        a_path = bf.join(dirpath, "a")
        with bf.BlobFile(a_path, "wb") as w:
            w.write(contents)
        b_path = bf.join(dirpath, "b")
        with bf.BlobFile(b_path, "wb") as w:
            w.write(contents)
        bf.makedirs(bf.join(dirpath, "c"))
        entries = sorted(list(bf.scandir(dirpath)))
        assert [e.name for e in entries] == ["a", "b", "c"]
        assert [e.path for e in entries
                ] == [bf.join(dirpath, name) for name in ["a", "b", "c"]]
        assert [e.is_dir for e in entries] == [False, False, True]
        assert [e.is_file for e in entries] == [True, True, False]
        assert entries[0].stat.size == len(contents)
        assert entries[1].stat.size == len(contents)
        assert entries[2].stat is None
Пример #30
0
def test_azure_metadata(ctx):
    # make sure metadata is preserved when opening a file for writing
    # which clears uncommitted blocks
    contents = b"meow!"

    with ctx() as path:
        with bf.BlobFile(path, "wb") as f:
            f.write(contents)

        bf.set_mtime(path, 1)
        _isfile, orig_metadata = ops._azure_isfile(path)
        time.sleep(5)
        with bf.BlobFile(path, "wb", streaming=True) as f:
            _isfile, new_metadata = ops._azure_isfile(path)
        keys = set(orig_metadata.keys()).union(new_metadata.keys())
        for key in sorted(keys):
            orig_val = orig_metadata.get(key)
            new_val = new_metadata.get(key)
            if key not in ["Date", "ETag", "Last-Modified", "x-ms-request-id"]:
                assert orig_val == new_val