Пример #1
0
    def test_appending_vars_that_lack_append_dim(self):

        src_path_pat = 'src_{}.zarr'
        dst_path = 'my.zarr'
        self.add_path(dst_path)

        writer = DatasetWriter(dst_path, output_overwrite=False,
                               input_decode_cf=False)

        n = 3
        for i in range(0, n):
            field_names_values = np.full((3, 50), 0, dtype='S')
            field_names_values[0, 0] = np.array('A')
            field_names_values[1, 0] = np.array('B')
            field_names_values[2, 0] = np.array('C')

            src_dataset = new_test_dataset(day=i + 1)
            src_dataset = src_dataset.assign(
                field_names=xr.DataArray(field_names_values,
                                         dims=("fields", "field_name_length"))
            )
            src_path = src_path_pat.format(i)
            self.add_path(src_path)
            src_dataset.to_zarr(src_path)
            with xr.open_zarr(src_path, decode_cf=False) as src_dataset:
                writer.write_dataset(src_dataset, append=i > 0)

        self.assertTimeSlicesOk(dst_path, src_path_pat, n)
Пример #2
0
 def test_append_to_non_increasing_append_mode_all(self):
     dst_path = "my.zarr"
     self.add_path(dst_path)
     ds1, ds2 = new_append_test_datasets(
         ["2001-01-01", "2001-01-03", "2001-01-02"],
         ["2001-01-04", "2001-01-05", "2001-01-06"]
     )
     ds1.to_zarr(dst_path)
     w = DatasetWriter(dst_path, output_append=True, output_append_dim="t",
                       output_append_mode=AppendMode.all)
     w.write_dataset(ds2)
Пример #3
0
    def test_finalize_only_and_no_output(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr',
                               finalize_only=True,
                               output_append=True,
                               output_metadata=dict(comment='This dataset is a test.'))

        with self.assertRaises(FileNotFoundError) as e:
            writer.finalize_dataset()
        self.assertEqual(('output path not found: my.zarr',),
                         e.exception.args)
Пример #4
0
    def test_finalize_only_and_append(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr',
                               finalize_only=True,
                               output_append=True)

        ds = new_test_dataset(day=1)
        with self.assertRaises(RuntimeError) as e:
            writer.write_dataset(ds)
        self.assertEqual(('internal error: cannot write/append'
                          ' datasets when in finalize-only mode',),
                         e.exception.args)
Пример #5
0
    def test_local_postprocessor(self):
        self.add_path('my.zarr')
        writer = DatasetWriter(
            'my.zarr',
            output_overwrite=False,
            output_custom_postprocessor='tests.test_writer:my_postprocessor')
        ds = new_test_dataset(day=1)
        self.assertNotIn('crs', ds)

        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))
        with xr.open_zarr('my.zarr', consolidated=False) as ds:
            self.assertIn('crs', ds)
Пример #6
0
 def test_append_overlapping_forbid_overlap(self):
     dst_path = "my.zarr"
     self.add_path(dst_path)
     ds1, ds2 = new_append_test_datasets(
         ["2001-01-01", "2001-01-02", "2001-01-03"],
         ["2001-01-02", "2001-01-03", "2001-01-04"]
     )
     ds1.to_zarr(dst_path)
     with pytest.raises(ValueError,
                        match="may not overlap"):
         w = DatasetWriter(dst_path, output_append=True,
                           output_append_dim="t",
                           output_append_mode=AppendMode.no_overlap)
         w.write_dataset(ds2)
Пример #7
0
 def test_append_non_increasing_append_newer(self):
     dst_path = "my.zarr"
     self.add_path(dst_path)
     ds1, ds2 = new_append_test_datasets(
         ["2001-01-01", "2001-01-02", "2001-01-03"],
         ["2001-01-05", "2001-01-04", "2001-01-03", "2001-02-02"]
     )
     ds1.to_zarr(dst_path)
     w = DatasetWriter(dst_path, output_append=True,
                       output_append_dim="t",
                       output_append_mode=AppendMode.newer)
     with pytest.raises(ValueError,
                        match="must be increasing"):
         w.write_dataset(ds2)
Пример #8
0
 def test_local_dry_run_for_existing(self):
     self.add_path('my.zarr')
     ds = new_test_dataset(day=1)
     writer = DatasetWriter('my.zarr', output_overwrite=True)
     writer.write_dataset(ds)
     self.assertTrue(os.path.isdir('my.zarr'))
     writer = DatasetWriter('my.zarr', output_overwrite=True, dry_run=True)
     writer.write_dataset(ds)
     self.assertTrue(os.path.isdir('my.zarr'))
Пример #9
0
 def test_object_storage_params(self):
     # Not a real test but test coverage will increase a bit.
     DatasetWriter('mybucket/my.zarr',
                   output_s3_kwargs=dict(
                       key='mykey',
                       secret='mysecret',
                       client_kwargs=dict(
                           endpoint_url='http://bibo.s3.com'
                       )))
Пример #10
0
 def test_append_and_postprocessor(self):
     with self.assertRaises(ValueError) as cm:
         DatasetWriter('my.zarr',
                       output_append=True,
                       output_custom_postprocessor=
                       'tests.test_writer:my_postprocessor')
     self.assertEqual('output_append and output_custom_postprocessor '
                      'cannot both be given',
                      f'{cm.exception}')
Пример #11
0
    def test_local_overwrite(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr', output_overwrite=False)
        ds = new_test_dataset(day=1)
        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))

        writer = DatasetWriter('my.zarr', output_overwrite=True)
        ds = new_test_dataset(day=2)
        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))
Пример #12
0
    def test_append_with_input_decode_cf(self):

        src_path_pat = 'src_{}.zarr'
        dst_path = 'my.zarr'
        self.add_path(dst_path)

        writer = DatasetWriter(dst_path, output_overwrite=False,
                               input_decode_cf=False)

        n = 3
        for i in range(0, n):
            src_dataset = new_test_dataset(day=i + 1)
            src_path = src_path_pat.format(i)
            self.add_path(src_path)
            src_dataset.to_zarr(src_path)
            with xr.open_zarr(src_path, decode_cf=False) as src_dataset:
                writer.write_dataset(src_dataset, append=i > 0)

        self.assertTimeSlicesOk(dst_path, src_path_pat, n)
Пример #13
0
 def test_append_overlapping_append_newer(self):
     for consolidated in False, True:
         with self.subTest(consolidated=consolidated):
             dst_path = "my.zarr"
             self.add_path(dst_path)
             ds1, ds2 = new_append_test_datasets(
                 ["2001-01-01", "2001-01-02", "2001-01-03"],
                 ["2001-01-02", "2001-01-03", "2001-01-04", "2001-02-05"]
             )
             ds1.to_zarr(dst_path, consolidated=consolidated)
             w = DatasetWriter(dst_path, output_append=True,
                               output_append_dim="t",
                               output_append_mode=AppendMode.newer,
                               output_consolidated=consolidated)
             w.write_dataset(ds2)
             ds3 = xr.open_zarr(dst_path, consolidated=consolidated)
             expected = np.array(["2001-01-01", "2001-01-02", "2001-01-03",
                                  "2001-01-04", "2001-02-05"],
                                 dtype="datetime64[ns]")
             np.testing.assert_equal(expected, ds3.t.data)
Пример #14
0
 def test_finalize_only_and_consolidate_if_specified(self):
     self.add_path('my.zarr')
     ds = new_test_dataset(day=1)
     writer = DatasetWriter('my.zarr',
                            output_overwrite=True)
     writer.write_dataset(ds)
     writer.finalize_dataset()
     self.assertTrue(os.path.isdir('my.zarr'))
     self.assertFalse(os.path.isfile('my.zarr/.zmetadata'))
     writer = DatasetWriter('my.zarr',
                            output_consolidated=True,
                            finalize_only=True)
     writer.finalize_dataset()
     self.assertTrue(os.path.isdir('my.zarr'))
     self.assertTrue(os.path.isfile('my.zarr/.zmetadata'))
     with open('my.zarr/.zmetadata') as fp:
         metadata = json.load(fp)
     self.assertIn('metadata', metadata)
     self.assertEqual({},
                      metadata['metadata'].get('.zattrs'))
Пример #15
0
 def test_append_overlapping_replace(self):
     dst_path = "my.zarr"
     self.add_path(dst_path)
     ds1, ds2 = new_append_test_datasets(
         ["2001-01-01", "2001-01-02", "2001-01-03", "2001-01-05"],
         ["2001-01-02", "2001-01-03", "2001-01-04", "2001-01-06"]
     )
     ds1.to_zarr(dst_path)
     w = DatasetWriter(dst_path, output_append=True,
                       output_append_dim="t",
                       output_append_mode=AppendMode.replace,
                       output_consolidated=False)
     w.write_dataset(ds2)
     ds3 = xr.open_zarr(dst_path, consolidated=False)
     np.testing.assert_equal(
         np.array(["2001-01-01", "2001-01-02", "2001-01-03",
                   "2001-01-04", "2001-01-05", "2001-01-06"],
                  dtype="datetime64[ns]"), ds3.t.data)
     np.testing.assert_equal(
         np.array([0, 1, 1, 1, 0, 1]),
         ds3.v.isel(x=0, y=0)
     )
Пример #16
0
    def test_local(self):
        self.add_path('my.zarr')
        writer = DatasetWriter('my.zarr', output_overwrite=False)
        ds = new_test_dataset(day=1)
        writer.write_dataset(ds)
        self.assertTrue(os.path.isdir('my.zarr'))

        ds = new_test_dataset(day=2)
        with self.assertRaises(zarr.errors.ContainsGroupError):
            writer.write_dataset(ds)
Пример #17
0
 def test_finalize_updates_metadata(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr',
                            output_append=True,
                            output_metadata=dict(comment='This dataset is a test.'))
     for i in range(3):
         ds = new_test_dataset(day=i + 1)
         writer.write_dataset(ds)
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertNotIn('comment', ds.attrs)
     writer.finalize_dataset()
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertIn('comment', ds.attrs)
         self.assertEqual('This dataset is a test.', ds.attrs['comment'])
Пример #18
0
 def test_finalize_adjusts_metadata_with_time_bnds(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr', output_append=True,
                            output_adjust_metadata=True)
     for i in range(3):
         ds = new_test_dataset(day=i + 1, add_time_bnds=True)
         writer.write_dataset(ds)
     writer.finalize_dataset()
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertIn('time_coverage_start', ds.attrs)
         self.assertEqual('2020-12-01 09:30:00',
                          ds.attrs['time_coverage_start'])
         self.assertIn('time_coverage_end', ds.attrs)
         self.assertEqual('2020-12-03 10:30:00',
                          ds.attrs['time_coverage_end'])
Пример #19
0
 def test_finalize_adjusts_metadata(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr',
                            output_append=True,
                            output_adjust_metadata=True,
                            input_paths=['a.nc', 'z.zarr', 'b.nc'])
     for i in range(3):
         ds = new_test_dataset(day=i + 1)
         writer.write_dataset(ds)
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertNotIn('history', ds.attrs)
         self.assertNotIn('source', ds.attrs)
         self.assertNotIn('time_coverage_start', ds.attrs)
         self.assertNotIn('time_coverage_end', ds.attrs)
     writer.finalize_dataset()
     with xr.open_zarr('my.zarr', consolidated=False) as ds:
         self.assertIn('history', ds.attrs)
         self.assertIn('source', ds.attrs)
         self.assertEqual('a.nc, b.nc', ds.attrs['source'])
         self.assertIn('time_coverage_start', ds.attrs)
         self.assertEqual('2020-12-01 10:00:00', ds.attrs['time_coverage_start'])
         self.assertIn('time_coverage_end', ds.attrs)
         self.assertEqual('2020-12-03 10:00:00', ds.attrs['time_coverage_end'])
Пример #20
0
 def test_local_dry_run(self):
     self.add_path('my.zarr')
     writer = DatasetWriter('my.zarr', dry_run=True)
     ds = new_test_dataset(day=1)
     writer.write_dataset(ds)
     self.assertFalse(os.path.isdir('out.zarr'))
Пример #21
0
 def test_expands_user(self):
     writer = DatasetWriter('~/my.zarr')
     self.assertEqual(os.path.expanduser('~/my.zarr'), writer._output_path)
Пример #22
0
 def test_no_output_path(self):
     with self.assertRaises(ValueError) as cm:
         DatasetWriter('')
     self.assertEqual('output_path must be given',
                      f'{cm.exception}')
Пример #23
0
 def test_aws_s3_with_unknown_bucket(self):
     ds = new_test_dataset(day=1)
     writer = DatasetWriter(f's3://my{uuid.uuid4()}/my.zarr')
     with self.assertRaises(Exception):
         # We know this will raise, but our test coverage increases a little bit.
         writer.write_dataset(ds)