def test_iter_chunk_keys_with_more_base_dims(self): actual = sorted(core.iter_chunk_keys({'x': (3, 3)}, base={'x': 30, 'y': 0})) expected = [ xarray_beam.ChunkKey({'x': 30, 'y': 0}), xarray_beam.ChunkKey({'x': 33, 'y': 0}), ] self.assertEqual(actual, expected)
def test_chunks_to_zarr(self): dataset = xarray.Dataset( {'foo': ('x', np.arange(0, 60, 10))}, coords={'x': np.arange(6)}, ) chunked = dataset.chunk() inputs = [ (xarray_beam.ChunkKey({'x': 0}), dataset), ] with self.subTest('no template'): temp_dir = self.create_tempdir().full_path inputs | xarray_beam.ChunksToZarr(temp_dir) result = xarray.open_zarr(temp_dir, consolidated=True) xarray.testing.assert_identical(dataset, result) with self.subTest('with template'): temp_dir = self.create_tempdir().full_path inputs | xarray_beam.ChunksToZarr(temp_dir, chunked) result = xarray.open_zarr(temp_dir, consolidated=True) xarray.testing.assert_identical(dataset, result) with self.subTest('with zarr_chunks and with template'): temp_dir = self.create_tempdir().full_path zarr_chunks = {'x': 3} inputs | xarray_beam.ChunksToZarr(temp_dir, chunked, zarr_chunks) result = xarray.open_zarr(temp_dir, consolidated=True) xarray.testing.assert_identical(dataset, result) self.assertEqual(result.chunks, {'x': (3, 3)}) with self.subTest('with zarr_chunks and no template'): temp_dir = self.create_tempdir().full_path zarr_chunks = {'x': 3} inputs | xarray_beam.ChunksToZarr(temp_dir, zarr_chunks=zarr_chunks) result = xarray.open_zarr(temp_dir, consolidated=True) xarray.testing.assert_identical(dataset, result) self.assertEqual(result.chunks, {'x': (3, 3)}) temp_dir = self.create_tempdir().full_path with self.assertRaisesRegex( ValueError, 'template does not have any variables chunked with Dask', ): xarray_beam.ChunksToZarr(temp_dir, dataset) temp_dir = self.create_tempdir().full_path template = chunked.assign_coords(x=np.zeros(6)) with self.assertRaisesRegex( ValueError, 'template and chunk indexes do not match', ): inputs | xarray_beam.ChunksToZarr(temp_dir, template) inputs2 = [ (xarray_beam.ChunkKey({'x': 0}), dataset.expand_dims(z=[1, 2])), ] temp_dir = self.create_tempdir().full_path with self.assertRaisesRegex( ValueError, 'unexpected new indexes found in chunk', ): inputs2 | xarray_beam.ChunksToZarr(temp_dir, template)
def test_comparison(self): key = xarray_beam.ChunkKey({'x': 0, 'y': 10}) with self.assertRaises(TypeError): key < 'foo' with self.assertRaisesRegex(ValueError, 'Dimensions must match'): key < xarray_beam.ChunkKey({'x': 0}) other = xarray_beam.ChunkKey({'x': 0, 'y': 20}) self.assertLess(key, other) self.assertGreater(other, key)
def test_use_as_beam_key(self): inputs = [ (xarray_beam.ChunkKey({'x': 0, 'y': 1}), 1), (xarray_beam.ChunkKey({'x': 0, 'y': 2}), 2), (xarray_beam.ChunkKey({'y': 1, 'x': 0}), 3), ] expected = [ (xarray_beam.ChunkKey({'x': 0, 'y': 1}), [1, 3]), (xarray_beam.ChunkKey({'x': 0, 'y': 2}), [2]), ] actual = inputs | beam.GroupByKey() self.assertEqual(actual, expected)
def test_operators(self): key = xarray_beam.ChunkKey({'x': 0, 'y': 10}) expected = xarray_beam.ChunkKey({'x': 0, 'y': 10, 'z': 100}) actual = key | {'z': 100} self.assertEqual(actual, expected) expected = xarray_beam.ChunkKey({'y': 10}) actual = key - {'x'} self.assertEqual(actual, expected) with self.assertRaises(TypeError): key - 'x' with self.assertRaisesRegex(ValueError, 'not found'): key - {'z'}
def test_mapping(self): key = xarray_beam.ChunkKey({'x': 0, 'y': 10}) self.assertEqual(list(key.keys()), ['x', 'y']) self.assertEqual(list(key.values()), [0, 10]) self.assertIn('x', key) self.assertNotIn('z', key) self.assertEqual(key['x'], 0)
def test_immutability(self): key = xarray_beam.ChunkKey({'x': 0, 'y': 10}) with self.assertRaises(TypeError): key['z'] = 100 dict_ = {key: 'foo'} self.assertEqual(dict_[key], 'foo')
def test_dataset_to_chunks_whole(self): dataset = xarray.Dataset({'foo': ('x', np.arange(6))}) expected = [(xarray_beam.ChunkKey({'x': 0}), dataset)] actual = ( test_util.EagerPipeline() | xarray_beam.DatasetToChunks(dataset, chunks={'x': -1}) ) self.assertIdenticalChunks(actual, expected) actual = ( test_util.EagerPipeline() | xarray_beam.DatasetToChunks(dataset, chunks={}) ) self.assertIdenticalChunks(actual, expected)
def test_dataset_to_chunks_multiple(self): dataset = xarray.Dataset({'foo': ('x', np.arange(6))}) expected = [ (xarray_beam.ChunkKey({'x': 0}), dataset.head(x=3)), (xarray_beam.ChunkKey({'x': 3}), dataset.tail(x=3)), ] actual = ( test_util.EagerPipeline() | xarray_beam.DatasetToChunks(dataset.chunk({'x': 3})) ) self.assertIdenticalChunks(actual, expected) actual = ( test_util.EagerPipeline() | xarray_beam.DatasetToChunks(dataset.chunk({'x': 3}), num_threads=2) ) self.assertIdenticalChunks(actual, expected) actual = ( test_util.EagerPipeline() | xarray_beam.DatasetToChunks(dataset, chunks={'x': 3}) ) self.assertIdenticalChunks(actual, expected)
def test_to_slices(self): key = xarray_beam.ChunkKey({'x': 0, 'y': 10}) expected = {'x': slice(0, 5, 1), 'y': slice(10, 20, 1)} slices = key.to_slices({'x': 5, 'y': 10}) self.assertEqual(slices, expected) slices = key.to_slices({'x': 5, 'y': 10, 'extra_key': 100}) self.assertEqual(slices, expected) expected = {'x': slice(None), 'y': slice(10, 20, 1)} slices = key.to_slices({'y': 10}) self.assertEqual(slices, expected) with self.assertRaisesRegex(ValueError, 'non-zero offset'): key.to_slices({'x': 5})
def test_iter_chunk_keys(self): actual = sorted(core.iter_chunk_keys({'x': (3, 3), 'y': (2, 2, 2)})) expected = [ xarray_beam.ChunkKey({'x': 0, 'y': 0}), xarray_beam.ChunkKey({'x': 0, 'y': 2}), xarray_beam.ChunkKey({'x': 0, 'y': 4}), xarray_beam.ChunkKey({'x': 3, 'y': 0}), xarray_beam.ChunkKey({'x': 3, 'y': 2}), xarray_beam.ChunkKey({'x': 3, 'y': 4}), ] self.assertEqual(actual, expected)
def test_repr(self): key = xarray_beam.ChunkKey({'x': 0, 'y': 10}) expected = "ChunkKey({'x': 0, 'y': 10})" self.assertEqual(repr(key), expected)