def test_download_single_to_dir(tempdir, azure):
    with azure_teardown(azure):
        name = posix(test_dir, 'remote.csv')
        lines = 100
        size, checksum = create_remote_csv(azure, name, 10, 5, lines)
        fname = os.path.join(tempdir, 'remote.csv')
        try:
            down = ADLDownloader(azure,
                                 name,
                                 tempdir,
                                 1,
                                 2**24,
                                 overwrite=True)
            assert md5sum(fname) == checksum
            assert os.stat(fname).st_size == size
            assert linecount(fname) == lines
        finally:
            if os.path.isfile(fname):
                os.remove(fname)
Exemplo n.º 2
0
def test_full_read(azure):
    with azure_teardown(azure):
        with azure.open(a, 'wb') as f:
            f.write(b'0123456789')

        with azure.open(a, 'rb') as f:
            assert len(f.read(4)) == 4
            assert len(f.read(4)) == 4
            assert len(f.read(4)) == 2

        with azure.open(a, 'rb') as f:
            assert len(f.read()) == 10

        with azure.open(a, 'rb') as f:
            assert f.tell() == 0
            f.seek(3)
            assert f.read(4) == b'3456'
            assert f.tell() == 7
            assert f.read(4) == b'789'
            assert f.tell() == 10
Exemplo n.º 3
0
def test_filename_specialchar(azure):
    with azure_teardown(azure):
        with azure.open(specialCharFile, 'wb') as f:
            f.write(b'0123456789')

        with azure.open(specialCharFile, 'rb') as f:
            assert len(f.read(4)) == 4
            assert len(f.read(4)) == 4
            assert len(f.read(4)) == 2

        with azure.open(specialCharFile, 'rb') as f:
            assert len(f.read()) == 10

        with azure.open(specialCharFile, 'rb') as f:
            assert f.tell() == 0
            f.seek(3)
            assert f.read(4) == b'3456'
            assert f.tell() == 7
            assert f.read(4) == b'789'
            assert f.tell() == 10
def test_ls_batched(azure):

    test_dir = working_dir() / 'abc'
    azure.mkdir(test_dir)
    with azure_teardown(azure):
        test_size = 10
        assert azure._ls(test_dir, batch_size=10) == []
        create_files(azure,
                     number_of_files=10,
                     prefix='123',
                     root_path=test_dir)
        with pytest.raises(ValueError):
            assert len(azure._ls(test_dir, batch_size=1)) == test_size

        assert len(azure._ls(test_dir, batch_size=9)) == test_size
        assert len(azure._ls(test_dir, batch_size=10)) == test_size
        assert len(azure._ls(test_dir, batch_size=11)) == test_size
        assert len(azure._ls(test_dir, batch_size=2)) == test_size
        assert len(azure._ls(test_dir, batch_size=100)) == test_size
        assert len(azure._ls(test_dir)) == test_size
Exemplo n.º 5
0
def test_download_single_empty_file(tempdir, azure):
    with azure_teardown(azure):
        name = posix(test_dir, 'remote.csv')
        lines = 0  # the file should have no bytes in it
        size, checksum = create_remote_csv(azure, name, 10, 5, lines)
        fname = os.path.join(tempdir, 'local.csv')

        # single chunk
        try:
            down = ADLDownloader(azure,
                                 name,
                                 fname,
                                 1,
                                 size + 10,
                                 overwrite=True)
            assert md5sum(fname) == checksum
            assert os.stat(fname).st_size == size
            assert linecount(fname) == lines
        finally:
            if os.path.isfile(fname):
                os.remove(fname)
def test_upload_single_file_in_dir(tempdir, azure):
    with azure_teardown(azure):
        lpath_dir = tempdir
        lfilename = os.path.join(lpath_dir, 'singlefile')
        with open(lfilename, 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure,
                                   transfer=put_chunk,
                                   unique_temporary=False)

        up = ADLUploader(azure,
                         test_dir / 'singlefiledir',
                         lpath_dir,
                         nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'singlefiledir' /
                          'singlefile')['length'] == 10
        azure.rm(test_dir / 'singlefiledir' / 'singlefile')
Exemplo n.º 7
0
def test_readlines(azure):
    with azure_teardown(azure):
        with azure.open(a, 'wb') as f:
            f.write(b'123\n456')

        with azure.open(a, 'rb') as f:
            lines = f.readlines()
            assert lines == [b'123\n', b'456']

        with azure.open(a, 'rb') as f:
            assert list(f) == lines

        with azure.open(a, 'wb') as f:
            with pytest.raises(ValueError):
                f.read()

        bigdata = [b'fe', b'fi', b'fo'] * 1000
        with azure.open(a, 'wb') as f:
            f.write(b'\n'.join(bigdata))
        with azure.open(a, 'rb') as f:
            lines = list(f)
        assert all(l in [b'fe\n', b'fi\n', b'fo', b'fo\n'] for l in lines)
def test_upload_empty_folder(tempdir, azure):
    with azure_teardown(azure):
        os.mkdir(os.path.join(tempdir, "dir1"))
        os.mkdir(os.path.join(tempdir, "dir1", "b"))

        with open(os.path.join(tempdir, "dir1", "file.txt"), 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure,
                                   transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk, empty file
        up = ADLUploader(azure,
                         test_dir / "dir1",
                         os.path.join(tempdir, "dir1"),
                         nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / "dir1" / "b")['type'] == 'DIRECTORY'
        azure.rm(test_dir / "dir1", recursive=True)
def test_upload_one(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk
        up = ADLUploader(azure, test_dir / 'littlefile', littlefile, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'littlefile')['length'] == 10

        # multiple chunks, one thread
        size = 10000
        up = ADLUploader(azure, test_dir / 'bigfile', bigfile, nthreads=1,
                         chunksize=size//5, client=client, run=False,
                         overwrite=True)
        up.run()

        assert azure.info(test_dir / 'bigfile')['length'] == size

        azure.rm(test_dir / 'bigfile')
Exemplo n.º 10
0
def test_upload_glob(tempdir, azure):
    for directory in ['a', 'b']:
        d = os.path.join(tempdir, 'data', directory)
        os.makedirs(d)
        for data in ['x.csv', 'y.csv', 'z.txt']:
            with open(os.path.join(d, data), 'wb') as f:
                f.write(b'0123456789')

    with azure_teardown(azure):
        local_path = os.path.join(tempdir, 'data', 'a', '*.csv')
        up = ADLUploader(azure,
                         test_dir,
                         local_path,
                         run=False,
                         overwrite=True)

        file_pair_dict = dict(up._file_pairs)
        assert len(file_pair_dict.keys()) == 2
        rfiles = [
            posix(AzureDLPath(f).relative_to(test_dir))
            for f in file_pair_dict.values()
        ]
        assert sorted(rfiles) == sorted(['x.csv', 'y.csv'])

        local_path = os.path.join(tempdir, 'data', '*', '*.csv')
        up = ADLUploader(azure,
                         test_dir,
                         local_path,
                         run=False,
                         overwrite=True)

        file_pair_dict = dict(up._file_pairs)
        assert len(file_pair_dict.keys()) == 4

        rfiles = [
            posix(AzureDLPath(f).relative_to(test_dir))
            for f in file_pair_dict.values()
        ]
        assert sorted(rfiles) == sorted([
            posix('a', 'x.csv'),
            posix('a', 'y.csv'),
            posix('b', 'x.csv'),
            posix('b', 'y.csv')
        ])

        local_path = os.path.join(tempdir, 'data', '*', 'z.txt')
        up = ADLUploader(azure,
                         test_dir,
                         local_path,
                         run=False,
                         overwrite=True)

        file_pair_dict = dict(up._file_pairs)
        assert len(file_pair_dict.keys()) == 2

        rfiles = [
            posix(AzureDLPath(f).relative_to(test_dir))
            for f in file_pair_dict.values()
        ]

        assert sorted(rfiles) == sorted(
            [posix('a', 'z.txt'), posix('b', 'z.txt')])
Exemplo n.º 11
0
def test_touch_exists(azure):
    with azure_teardown(azure):
        azure.touch(a)
        assert azure.exists(a, invalidate_cache=False)
Exemplo n.º 12
0
def test_ls_empty_with_details(azure):
    with azure_teardown(azure):
        assert not azure.ls(test_dir, invalidate_cache=False, detail=True)
Exemplo n.º 13
0
def test_glob_walk_invalidate_cache(azure):
    with azure_teardown(azure):
        azure.mkdir(test_dir / 'c')
        azure.mkdir(test_dir / 'c' / 'd')
        filenames = ['a', 'a1', 'a2', 'a3', 'b1', 'c/x1', 'c/x2', 'c/d/x3']
        filenames = [test_dir / s for s in filenames]
        for fn in filenames:
            azure.touch(fn)

        assert set(azure.glob(test_dir / 'a*')) == {
            posix(test_dir / 'a'),
            posix(test_dir / 'a1'),
            posix(test_dir / 'a2'),
            posix(test_dir / 'a3')
        }

        assert set(azure.glob(test_dir / 'c' / '*')) == {
            posix(test_dir / 'c' / 'x1'),
            posix(test_dir / 'c' / 'x2')
        }

        assert (set(azure.glob(test_dir / 'c')) == set(
            azure.glob(test_dir / 'c' / '')))

        assert set(azure.glob(test_dir / 'a')) == {posix(test_dir / 'a')}
        assert set(azure.glob(test_dir / 'a1')) == {posix(test_dir / 'a1')}

        assert set(azure.glob(test_dir / '*')) == {
            posix(test_dir / 'a'),
            posix(test_dir / 'a1'),
            posix(test_dir / 'a2'),
            posix(test_dir / 'a3'),
            posix(test_dir / 'b1')
        }

        assert set(azure.walk(test_dir, invalidate_cache=True)) == {
            posix(test_dir / 'a'),
            posix(test_dir / 'a1'),
            posix(test_dir / 'a2'),
            posix(test_dir / 'a3'),
            posix(test_dir / 'b1'),
            posix(test_dir / 'c' / 'x1'),
            posix(test_dir / 'c' / 'x2'),
            posix(test_dir / 'c' / 'd' / 'x3')
        }

        assert set(azure.walk(test_dir / 'c', invalidate_cache=True)) == {
            posix(test_dir / 'c' / 'x1'),
            posix(test_dir / 'c' / 'x2'),
            posix(test_dir / 'c' / 'd' / 'x3')
        }

        assert set(azure.walk(test_dir / 'c', invalidate_cache=True)) == set(
            azure.walk(test_dir / 'c', invalidate_cache=True))

        # test glob and walk with details=True
        glob_details = azure.glob(test_dir / '*',
                                  details=True,
                                  invalidate_cache=True)

        # validate that the objects are subscriptable
        assert glob_details[0]['name'] is not None
        assert glob_details[0]['type'] is not None

        walk_details = azure.walk(test_dir,
                                  details=True,
                                  invalidate_cache=True)
        assert walk_details[0]['name'] is not None
        assert walk_details[0]['type'] is not None
Exemplo n.º 14
0
def test_delimiters_dash(azure):
    with azure_teardown(azure):
        write_delimited_data(azure, b'--')
Exemplo n.º 15
0
def test_delimiters_newline(azure):
    with azure_teardown(azure):
        write_delimited_data(azure, b'\n')
Exemplo n.º 16
0
def test_touch_exists(azure):
    with azure_teardown(azure):
        azure.touch(a)
        assert azure.exists(a)