def test_not_chunked(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=0.4, temperature=275.2) with self.assertRaises(ValueError) as cm: get_empty_dataset_chunks(dataset) self.assertEqual('data array not chunked', f'{cm.exception}')
def _prune(input_path: str = None, dry_run: bool = False, monitor=None): from xcube.core.chunk import get_empty_dataset_chunks from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_cube input_format = guess_dataset_format(input_path) if input_format != FORMAT_NAME_ZARR: raise click.ClickException("input must be a cube in ZARR format") monitor(f'Opening cube from {input_path!r}...') with open_cube(input_path) as cube: monitor('Identifying empty blocks...') empty_chunks = get_empty_dataset_chunks(cube) num_deleted = 0 for var_name, chunk_indices in empty_chunks.items(): monitor( f'Deleting {len(chunk_indices)} empty block file(s) for variable {var_name!r}...' ) for chunk_index in chunk_indices: ok = _delete_block_file(input_path, var_name, chunk_index, dry_run, monitor) if ok: num_deleted += 1 monitor(f'Done, {num_deleted} block file(s) deleted.')
def test_non_empty(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=0.4, temperature=275.2).chunk( dict(time=1, lat=90, lon=90)) empty_dataset_chunks = get_empty_dataset_chunks(dataset) self.assertEqual({ 'precipitation': (), 'temperature': () }, empty_dataset_chunks)
def test_not_chunked(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=0.4, temperature=275.2) empty_dataset_chunks = get_empty_dataset_chunks(dataset) self.assertIsInstance(empty_dataset_chunks, collections.Iterator) self.assertFalse(isinstance(empty_dataset_chunks, (list, tuple))) self.assertEqual([('precipitation', ()), ('temperature', ())], [(v, tuple(c)) for v, c in empty_dataset_chunks])
def test_all_empty(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=np.nan, temperature=np.nan).chunk( dict(time=1, lat=90, lon=90)) empty_dataset_chunks = get_empty_dataset_chunks(dataset) self.assertEqual( { 'precipitation': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3)), 'temperature': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3)) }, empty_dataset_chunks)
def test_all_empty(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=np.nan, temperature=np.nan) \ .chunk(dict(time=1, lat=90, lon=90)) empty_dataset_chunks = get_empty_dataset_chunks(dataset) self.assertIsInstance(empty_dataset_chunks, collections.Iterator) self.assertFalse(isinstance(empty_dataset_chunks, (list, tuple))) self.assertEqual([('precipitation', ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3))), ('temperature', ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3)))], [(v, tuple(c)) for v, c in empty_dataset_chunks])
def _prune(input_path: str, dry_run: bool, monitor: Monitor): from xcube.core.chunk import get_empty_dataset_chunks from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset input_format = guess_dataset_format(input_path) if input_format != FORMAT_NAME_ZARR: raise click.ClickException("input must be a dataset in Zarr format") num_deleted_total = 0 monitor(f'Opening dataset from {input_path!r}...', 1) with open_dataset(input_path) as dataset: monitor('Identifying empty chunks...', 1) for var_name, chunk_indices in get_empty_dataset_chunks(dataset): num_empty_chunks = 0 num_deleted = 0 for chunk_index in chunk_indices: num_empty_chunks += 1 if num_empty_chunks == 1: monitor( f'Found empty chunks in variable {var_name!r}, ' f'deleting block files...', 2) ok = _delete_block_file(input_path, var_name, chunk_index, dry_run, monitor) if ok: num_deleted += 1 if num_deleted > 0: monitor( f'Deleted {num_deleted} block file(s) ' f'for variable {var_name!r}.', 2) elif num_empty_chunks > 0: monitor( f'No block files for variable {var_name!r} ' f'could be deleted.', 2) num_deleted_total += num_deleted monitor(f'Done, {num_deleted_total} block file(s) deleted total.', 1)