Exemplo n.º 1
0
 def test_concat_multiindex(self):
     x = pd.MultiIndex.from_product([[1, 2, 3], ['a', 'b']])
     expected = Dataset({'x': x})
     actual = concat([expected.isel(x=slice(2)),
                      expected.isel(x=slice(2, None))], 'x')
     assert expected.equals(actual)
     assert isinstance(actual.x.to_index(), pd.MultiIndex)
Exemplo n.º 2
0
    def test_to_dask_dataframe(self):
        # Test conversion of Datasets to dask DataFrames
        x = da.from_array(np.random.randn(10), chunks=4)
        y = np.arange(10, dtype='uint8')
        t = list('abcdefghij')

        ds = Dataset(OrderedDict([('a', ('t', x)),
                                  ('b', ('t', y)),
                                  ('t', ('t', t))]))

        expected_pd = pd.DataFrame({'a': x,
                                    'b': y},
                                   index=pd.Index(t, name='t'))

        # test if 1-D index is correctly set up
        expected = dd.from_pandas(expected_pd, chunksize=4)
        actual = ds.to_dask_dataframe(set_index=True)
        # test if we have dask dataframes
        assert isinstance(actual, dd.DataFrame)

        # use the .equals from pandas to check dataframes are equivalent
        assert_frame_equal(expected.compute(), actual.compute())

        # test if no index is given
        expected = dd.from_pandas(expected_pd.reset_index(drop=False),
                                  chunksize=4)

        actual = ds.to_dask_dataframe(set_index=False)

        assert isinstance(actual, dd.DataFrame)
        assert_frame_equal(expected.compute(), actual.compute())
Exemplo n.º 3
0
 def test_open_and_do_math(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_mfdataset(tmp) as ds:
             actual = 1.0 * ds
             self.assertDatasetAllClose(original, actual)
Exemplo n.º 4
0
def radec2azel(scale: xarray.Dataset,
               latlon: Tuple[float, float], time: datetime=None) -> xarray.Dataset:

    if latlon is None or not isinstance(scale, xarray.Dataset):
        return None

    if time is None:
        with fits.open(scale.filename, mode='readonly') as f:
            try:
                t = f[0].header['FRAME']  # TODO this only works from Solis?
            except KeyError:
                logging.error('no time given in file or manually, cannot compute az/el')
                return None
        time = parse(t)
        logging.info('using FITS header for time')
    elif isinstance(time, datetime):
        pass
    elif isinstance(time, (float, int)):  # assume UT1_Unix
        time = datetime.utcfromtimestamp(time)
    else:  # user override of frame time
        time = parse(time)

    print('image time:', time)
# %% knowing camera location, time, and sky coordinates observed, convert to az/el for each pixel
    az, el = pymap3d.radec2azel(scale['ra'], scale['dec'], latlon[0], latlon[1], time)
# %% collect output
    scale['az'] = (('y', 'x'), az)
    scale['el'] = (('y', 'x'), el)
    scale.attrs['lat'] = latlon[0]
    scale.attrs['lon'] = latlon[1]
    scale.attrs['time'] = time

    return scale
Exemplo n.º 5
0
def adjust_temporal_attrs_impl(ds: xr.Dataset) -> xr.Dataset:
    """
    Adjust the global temporal attributes of the dataset by doing some
    introspection of the dataset and adjusting the appropriate attributes
    accordingly.

    In case the determined attributes do not exist in the dataset, these will
    be added.

    For more information on suggested global attributes see
    `Attribute Convention for Data Discovery
    <http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery>`_

    :param ds: Dataset to adjust
    :return: Adjusted dataset
    """

    temporal_attrs = _get_temporal_cf_attrs_from_var(ds)

    if temporal_attrs:
        ds = ds.copy()
        # Align temporal attributes with the ones from the shallow Dataset copy
        for key in temporal_attrs:
            if temporal_attrs[key] is not None:
                ds.attrs[key] = temporal_attrs[key]
            else:
                ds.attrs.pop(key, None)

    return ds
Exemplo n.º 6
0
 def test_roundtrip_object_dtype(self):
     floats = np.array([0.0, 0.0, 1.0, 2.0, 3.0], dtype=object)
     floats_nans = np.array([np.nan, np.nan, 1.0, 2.0, 3.0], dtype=object)
     letters = np.array(['ab', 'cdef', 'g'], dtype=object)
     letters_nans = np.array(['ab', 'cdef', np.nan], dtype=object)
     all_nans = np.array([np.nan, np.nan], dtype=object)
     original = Dataset({'floats': ('a', floats),
                         'floats_nans': ('a', floats_nans),
                         'letters': ('b', letters),
                         'letters_nans': ('b', letters_nans),
                         'all_nans': ('c', all_nans),
                         'nan': ([], np.nan)})
     expected = original.copy(deep=True)
     if isinstance(self, Only32BitTypes):
         # for netCDF3 tests, expect the results to come back as characters
         expected['letters_nans'] = expected['letters_nans'].astype('S')
         expected['letters'] = expected['letters'].astype('S')
     with self.roundtrip(original) as actual:
         try:
             self.assertDatasetIdentical(expected, actual)
         except AssertionError:
             # Most stores use '' for nans in strings, but some don't
             # first try the ideal case (where the store returns exactly)
             # the original Dataset), then try a more realistic case.
             # ScipyDataTest, NetCDF3ViaNetCDF4DataTest and NetCDF4DataTest
             # all end up using this case.
             expected['letters_nans'][-1] = ''
             self.assertDatasetIdentical(expected, actual)
Exemplo n.º 7
0
    def test_coordinates_encoding(self):
        def equals_latlon(obj):
            return obj == 'lat lon' or obj == 'lon lat'

        original = Dataset({'temp': ('x', [0, 1]), 'precip': ('x', [0, -1])},
                           {'lat': ('x', [2, 3]), 'lon': ('x', [4, 5])})
        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(actual, original)
        with create_tmp_file() as tmp_file:
            original.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds['temp'].attrs['coordinates']))
                self.assertTrue(equals_latlon(ds['precip'].attrs['coordinates']))
                self.assertNotIn('coordinates', ds.attrs)
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)

        modified = original.drop(['temp', 'precip'])
        with self.roundtrip(modified) as actual:
            self.assertDatasetIdentical(actual, modified)
        with create_tmp_file() as tmp_file:
            modified.to_netcdf(tmp_file)
            with open_dataset(tmp_file, decode_coords=False) as ds:
                self.assertTrue(equals_latlon(ds.attrs['coordinates']))
                self.assertNotIn('coordinates', ds['lat'].attrs)
                self.assertNotIn('coordinates', ds['lon'].attrs)
Exemplo n.º 8
0
    def test_roundtrip_strings_with_fill_value(self):
        values = np.array(['ab', 'cdef', np.nan], dtype=object)
        encoding = {'_FillValue': np.string_('X'), 'dtype': np.dtype('S1')}
        original = Dataset({'x': ('t', values, {}, encoding)})
        expected = original.copy(deep=True)
        expected['x'][:2] = values[:2].astype('S')
        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(expected, actual)

        original = Dataset({'x': ('t', values, {}, {'_FillValue': '\x00'})})
        if not isinstance(self, Only32BitTypes):
            # these stores can save unicode strings
            expected = original.copy(deep=True)
        if isinstance(self, BaseNetCDF4Test):
            # netCDF4 can't keep track of an empty _FillValue for VLEN
            # variables
            expected['x'][-1] = ''
        elif (isinstance(self, (NetCDF3ViaNetCDF4DataTest,
                                NetCDF4ClassicViaNetCDF4DataTest)) or
              (has_netCDF4 and type(self) is GenericNetCDFDataTest)):
            # netCDF4 can't keep track of an empty _FillValue for nc3, either:
            # https://github.com/Unidata/netcdf4-python/issues/273
            expected['x'][-1] = np.string_('')
        with self.roundtrip(original) as actual:
            self.assertDatasetIdentical(expected, actual)
Exemplo n.º 9
0
def state_to_xarray(state):
    '''Convert a dictionary of climlab.Field objects to xarray.Dataset

    Input: dictionary of climlab.Field objects
    (e.g. process.state or process.diagnostics dictionary)

    Output: xarray.Dataset object with all spatial axes,
    including 'bounds' axes indicating cell boundaries in each spatial dimension.

    Any items in the dictionary that are not instances of climlab.Field
    are ignored.'''
    from climlab.domain.field import Field

    ds = Dataset()
    for name, field in state.items():
        if isinstance(field, Field):
            ds[name] = Field_to_xarray(field)
            dom = field.domain
            for axname, ax in dom.axes.items():
                bounds_name = axname + '_bounds'
                ds.coords[bounds_name] = DataArray(ax.bounds, dims=[bounds_name],
                                    coords={bounds_name:ax.bounds})
                try:
                    ds[bounds_name].attrs['units'] = ax.units
                except:
                    pass
        else:
            warnings.warn('{} excluded from Dataset because it is not a Field variable.'.format(name))
    return ds
Exemplo n.º 10
0
 def _preprocess_dataset(self, ds: Dataset):
     # Convert specific data variables to coordinate variables
     for var_name in EXTRA_COORDS_VAR_NAMES:
         if var_name in ds.data_vars:
             ds.set_coords(var_name, inplace=True)
     # print(ds)
     return ds
Exemplo n.º 11
0
 def test_save_mfdataset_roundtrip(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     datasets = [original.isel(x=slice(5)),
                 original.isel(x=slice(5, 10))]
     with create_tmp_file() as tmp1:
         with create_tmp_file() as tmp2:
             save_mfdataset(datasets, [tmp1, tmp2])
             with open_mfdataset([tmp1, tmp2]) as actual:
                 self.assertDatasetIdentical(actual, original)
Exemplo n.º 12
0
def test_dask_layers_and_dependencies():
    ds = Dataset({'foo': ('x', range(5)),
                  'bar': ('x', range(5))}).chunk()

    x = dask.delayed(ds)
    assert set(x.__dask_graph__().dependencies).issuperset(
        ds.__dask_graph__().dependencies)
    assert set(x.foo.__dask_graph__().dependencies).issuperset(
        ds.__dask_graph__().dependencies)
Exemplo n.º 13
0
def adjust_spatial_attrs_impl(ds: xr.Dataset, allow_point: bool) -> xr.Dataset:
    """
    Adjust the global spatial attributes of the dataset by doing some
    introspection of the dataset and adjusting the appropriate attributes
    accordingly.

    In case the determined attributes do not exist in the dataset, these will
    be added.

    For more information on suggested global attributes see
    `Attribute Convention for Data Discovery
    <http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery>`_

    :param ds: Dataset to adjust
    :param allow_point: Whether to accept single point cells
    :return: Adjusted dataset
    """

    copied = False

    for dim in ('lon', 'lat'):
        geo_spatial_attrs = _get_geo_spatial_cf_attrs_from_var(ds, dim, allow_point=allow_point)
        if geo_spatial_attrs:
            # Copy any new attributes into the shallow Dataset copy
            for key in geo_spatial_attrs:
                if geo_spatial_attrs[key] is not None:
                    if not copied:
                        ds = ds.copy()
                        copied = True
                    ds.attrs[key] = geo_spatial_attrs[key]

    lon_min = ds.attrs.get('geospatial_lon_min')
    lat_min = ds.attrs.get('geospatial_lat_min')
    lon_max = ds.attrs.get('geospatial_lon_max')
    lat_max = ds.attrs.get('geospatial_lat_max')

    if lon_min is not None and lat_min is not None and lon_max is not None and lat_max is not None:

        if not copied:
            ds = ds.copy()

        ds.attrs['geospatial_bounds'] = 'POLYGON(({} {}, {} {}, {} {}, {} {}, {} {}))'. \
            format(lon_min, lat_min, lon_min, lat_max, lon_max, lat_max, lon_max, lat_min, lon_min, lat_min)

        # Determination of the following attributes from introspection in a general
        # way is ambiguous, hence it is safer to drop them than to risk preserving
        # out of date attributes.
        drop = ['geospatial_bounds_crs', 'geospatial_bounds_vertical_crs',
                'geospatial_vertical_min', 'geospatial_vertical_max',
                'geospatial_vertical_positive', 'geospatial_vertical_units',
                'geospatial_vertical_resolution']

        for key in drop:
            ds.attrs.pop(key, None)

    return ds
Exemplo n.º 14
0
    def test_weakrefs(self):
        example = Dataset({'foo': ('x', np.arange(5.0))})
        expected = example.rename({'foo': 'bar', 'x': 'y'})

        with create_tmp_file() as tmp_file:
            example.to_netcdf(tmp_file, engine='scipy')
            on_disk = open_dataset(tmp_file, engine='pynio')
            actual = on_disk.rename({'foo': 'bar', 'x': 'y'})
            del on_disk  # trigger garbage collection
            self.assertDatasetIdentical(actual, expected)
Exemplo n.º 15
0
    def test_variable_order(self):
        # doesn't work with scipy or h5py :(
        ds = Dataset()
        ds['a'] = 1
        ds['z'] = 2
        ds['b'] = 3
        ds.coords['c'] = 4

        with self.roundtrip(ds) as actual:
            self.assertEqual(list(ds), list(actual))
Exemplo n.º 16
0
    def test_persist_Dataset(self):
        ds = Dataset({'foo': ('x', range(5)),
                      'bar': ('x', range(5))}).chunk()
        ds = ds + 1
        n = len(ds.foo.data.dask)

        ds2 = ds.persist()

        assert len(ds2.foo.data.dask) == 1
        assert len(ds.foo.data.dask) == n  # doesn't mutate in place
Exemplo n.º 17
0
 def test_dataset_pickle(self):
     ds1 = Dataset({'a': DataArray(build_dask_array())})
     ds1.compute()
     self.assertFalse(ds1['a']._in_memory)
     self.assertEquals(kernel_call_count, 1)
     ds2 = pickle.loads(pickle.dumps(ds1))
     self.assertEquals(kernel_call_count, 1)
     self.assertDatasetIdentical(ds1, ds2)
     self.assertFalse(ds1['a']._in_memory)
     self.assertFalse(ds2['a']._in_memory)
Exemplo n.º 18
0
 def test_concat_encoding(self):
     # Regression test for GH1297
     ds = Dataset({'foo': (['x', 'y'], np.random.random((2, 3))),
                   'bar': (['x', 'y'], np.random.random((2, 3)))},
                  {'x': [0, 1]})
     foo = ds['foo']
     foo.encoding = {"complevel": 5}
     ds.encoding = {"unlimited_dims": 'x'}
     assert concat([foo, foo], dim="x").encoding == foo.encoding
     assert concat([ds, ds], dim="x").encoding == ds.encoding
Exemplo n.º 19
0
 def test_concat_coords(self):
     data = Dataset({"foo": ("x", np.random.randn(10))})
     expected = data.assign_coords(c=("x", [0] * 5 + [1] * 5))
     objs = [data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1)]
     for coords in ["different", "all", ["c"]]:
         actual = concat(objs, dim="x", coords=coords)
         self.assertDatasetIdentical(expected, actual)
     for coords in ["minimal", []]:
         with self.assertRaisesRegexp(ValueError, "not equal across"):
             concat(objs, dim="x", coords=coords)
Exemplo n.º 20
0
def diff(ds: xr.Dataset,
         ds2: xr.Dataset,
         monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Calculate the difference of two datasets (ds - ds2). This is done by
    matching variable names in the two datasets against each other and taking
    the difference of matching variables.

    If lat/lon/time extents differ between the datasets, the default behavior
    is to take the intersection of the datasets and run subtraction on that.
    However, broadcasting is possible. E.g. ds(lat/lon/time) - ds(lat/lon) is
    valid. In this case the subtrahend will be stretched to the size of
    ds(lat/lon/time) so that it can be subtracted. This also works if the
    subtrahend is a single time slice of arbitrary temporal position. In this
    case, the time dimension will be squeezed out leaving a lat/lon dataset.

    :param ds: The minuend dataset
    :param ds2: The subtrahend dataset
    :param monitor: a progress monitor.
    :return: The difference dataset
    """
    try:
        # Times do not intersect
        if 0 == len(ds.time - ds2.time) and \
                len(ds.time) == len(ds2.time):  # Times are the same length
            # If the datasets don't intersect in time dimension, a naive difference
            # would return empty data variables. Hence, the time coordinate has to
            # be dropped beforehand
            ds = ds.drop('time')
            ds2 = ds2.drop('time')
            return ds - ds2
    except AttributeError:
        # It is likely that the one operand is a lat/lon array that can be
        # broadcast against the other operand
        pass

    try:
        if 1 == len(ds2.time):
            # The subtrahend is a single time-slice -> squeeze 'time' dimension to
            # be able to broadcast is along minuend
            ds2 = ds2.squeeze('time', drop=True)
    except AttributeError:
        # Doesn't have a time dimension already
        pass
    except TypeError as e:
        if 'unsized object' in str(e):
            # The 'time' variable is a scalar
            pass
        else:
            raise TypeError(str(e))

    with monitor.observing("Subtract datasets"):
        diff = ds - ds2

    return diff
Exemplo n.º 21
0
def test_basic_compute():
    ds = Dataset({'foo': ('x', range(5)),
                  'bar': ('x', range(5))}).chunk({'x': 2})
    for get in [dask.threaded.get,
                dask.multiprocessing.get,
                dask.local.get_sync,
                None]:
        with dask.set_options(get=get):
            ds.compute()
            ds.foo.compute()
            ds.foo.variable.compute()
Exemplo n.º 22
0
 def test_concat_coords(self):
     data = Dataset({'foo': ('x', np.random.randn(10))})
     expected = data.assign_coords(c=('x', [0] * 5 + [1] * 5))
     objs = [data.isel(x=slice(5)).assign_coords(c=0),
             data.isel(x=slice(5, None)).assign_coords(c=1)]
     for coords in ['different', 'all', ['c']]:
         actual = concat(objs, dim='x', coords=coords)
         self.assertDatasetIdentical(expected, actual)
     for coords in ['minimal', []]:
         with self.assertRaisesRegexp(ValueError, 'not equal across'):
             concat(objs, dim='x', coords=coords)
Exemplo n.º 23
0
    def test_to_dask_dataframe_2D_set_index(self):
        # This will fail until dask implements MultiIndex support
        w = da.from_array(np.random.randn(2, 3), chunks=(1, 2))
        ds = Dataset({'w': (('x', 'y'), w)})
        ds['x'] = ('x', np.array([0, 1], np.int64))
        ds['y'] = ('y', list('abc'))

        expected = ds.compute().to_dataframe()
        actual = ds.to_dask_dataframe(set_index=True)
        assert isinstance(actual, dd.DataFrame)
        assert_frame_equal(expected, actual.compute())
Exemplo n.º 24
0
    def test_preprocess_mfdataset(self):
        original = Dataset({'foo': ('x', np.random.randn(10))})
        with create_tmp_file() as tmp:
            original.to_netcdf(tmp)

            def preprocess(ds):
                return ds.assign_coords(z=0)

            expected = preprocess(original)
            with open_mfdataset(tmp, preprocess=preprocess) as actual:
                self.assertDatasetIdentical(expected, actual)
Exemplo n.º 25
0
    def test_simultaneous_compute(self):
        ds = Dataset({'foo': ('x', range(5)),
                      'bar': ('x', range(5))}).chunk()

        count = [0]

        def counting_get(*args, **kwargs):
            count[0] += 1
            return dask.get(*args, **kwargs)

        ds.load(get=counting_get)
        assert count[0] == 1
Exemplo n.º 26
0
def _normalize_lon_360(ds: xr.Dataset) -> xr.Dataset:
    """
    Fix the longitude of the given dataset ``ds`` so that it ranges from -180 to +180 degrees.

    :param ds: The dataset whose longitudes may be given in the range 0 to 360.
    :return: The fixed dataset or the original dataset.
    """

    if 'lon' not in ds.coords:
        return ds

    lon_var = ds.coords['lon']

    if len(lon_var.shape) != 1:
        return ds

    lon_size = lon_var.shape[0]
    if lon_size < 2:
        return ds

    lon_size_05 = lon_size // 2
    lon_values = lon_var.values
    if not np.any(lon_values[lon_size_05:] > 180.):
        return ds

    delta_lon = lon_values[1] - lon_values[0]

    var_names = [var_name for var_name in ds.data_vars]

    ds = ds.assign_coords(lon=xr.DataArray(np.linspace(-180. + 0.5 * delta_lon,
                                                       +180. - 0.5 * delta_lon,
                                                       lon_size),
                                           dims=ds['lon'].dims,
                                           attrs=dict(long_name='longitude',
                                                      standard_name='longitude',
                                                      units='degrees east')))

    ds = adjust_spatial_attrs_impl(ds, True)

    new_vars = dict()
    for var_name in var_names:
        var = ds[var_name]
        if len(var.dims) >= 1 and var.dims[-1] == 'lon':
            values = np.copy(var.values)
            temp = np.copy(values[..., : lon_size_05])
            values[..., : lon_size_05] = values[..., lon_size_05:]
            values[..., lon_size_05:] = temp
            # import matplotlib.pyplot as plt
            # im = values[(len(values.shape) - 2) * [0] + [slice(None), slice(None)]]
            # plt.imshow(im)
            new_vars[var_name] = xr.DataArray(values, dims=var.dims, attrs=var.attrs, encoding=var.encoding)

    return ds.assign(**new_vars)
Exemplo n.º 27
0
    def test_to_dask_dataframe_no_coordinate(self):
        x = da.from_array(np.random.randn(10), chunks=4)
        ds = Dataset({'x': ('dim_0', x)})

        expected = ds.compute().to_dataframe().reset_index()
        actual = ds.to_dask_dataframe()
        assert isinstance(actual, dd.DataFrame)
        assert_frame_equal(expected, actual.compute())

        expected = ds.compute().to_dataframe()
        actual = ds.to_dask_dataframe(set_index=True)
        assert isinstance(actual, dd.DataFrame)
        assert_frame_equal(expected, actual.compute())
Exemplo n.º 28
0
 def test_open_dataset(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_dataset(tmp, chunks={'x': 5}) as actual:
             self.assertIsInstance(actual.foo.variable.data, da.Array)
             self.assertEqual(actual.foo.variable.data.chunks, ((5, 5),))
             self.assertDatasetIdentical(original, actual)
         with open_dataset(tmp, chunks=5) as actual:
             self.assertDatasetIdentical(original, actual)
         with open_dataset(tmp) as actual:
             self.assertIsInstance(actual.foo.variable.data, np.ndarray)
             self.assertDatasetIdentical(original, actual)
Exemplo n.º 29
0
    def test_simultaneous_compute(self):
        ds = Dataset({'foo': ('x', range(5)),
                      'bar': ('x', range(5))}).chunk()

        count = [0]

        def counting_get(*args, **kwargs):
            count[0] += 1
            return dask.get(*args, **kwargs)

        with dask.set_options(get=counting_get):
            ds.load()
        self.assertEqual(count[0], 1)
Exemplo n.º 30
0
 def test_lock(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp, format='NETCDF3_CLASSIC')
         with open_dataset(tmp, chunks=10) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp, engine='scipy') as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertNotIsInstance(task[-1], type(Lock()))
Exemplo n.º 31
0
 def test_combine_coords_join(self, join, expected):
     objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
     actual = combine_nested(objs, concat_dim="x", join=join)
     assert_identical(expected, actual)
Exemplo n.º 32
0
 def test_invalid_units_raises_eagerly(self):
     ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})})
     with self.assertRaisesRegexp(ValueError, 'unable to decode time'):
         decode_cf(ds)
Exemplo n.º 33
0
 def test_lazy_dataset(self):
     lazy_ds = Dataset({'foo': (('x', 'y'), self.data)})
     self.assertIsInstance(lazy_ds.foo.variable.data, da.Array)
Exemplo n.º 34
0
 def test_no_dimension_coords(self):
     ds0 = Dataset({"foo": ("x", [0, 1])})
     ds1 = Dataset({"foo": ("x", [2, 3])})
     with raises_regex(ValueError, "Could not find any dimension"):
         _infer_concat_order_from_coords([ds1, ds0])
Exemplo n.º 35
0
    def test_concat_promote_shape(self):
        # mixed dims within variables
        objs = [Dataset({}, {'x': 0}), Dataset({'x': [1]})]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [0, 1]})
        assert_identical(actual, expected)

        objs = [Dataset({'x': [0]}), Dataset({}, {'x': 1})]
        actual = concat(objs, 'x')
        assert_identical(actual, expected)

        # mixed dims between variables
        objs = [Dataset({'x': [2], 'y': 3}), Dataset({'x': [4], 'y': 5})]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [2, 4], 'y': ('x', [3, 5])})
        assert_identical(actual, expected)

        # mixed dims in coord variable
        objs = [
            Dataset({'x': [0]}, {'y': -1}),
            Dataset({'x': [1]}, {'y': ('x', [-2])})
        ]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [0, 1]}, {'y': ('x', [-1, -2])})
        assert_identical(actual, expected)

        # scalars with mixed lengths along concat dim -- values should repeat
        objs = [
            Dataset({'x': [0]}, {'y': -1}),
            Dataset({'x': [1, 2]}, {'y': -2})
        ]
        actual = concat(objs, 'x')
        expected = Dataset({'x': [0, 1, 2]}, {'y': ('x', [-1, -2, -2])})
        assert_identical(actual, expected)

        # broadcast 1d x 1d -> 2d
        objs = [
            Dataset({'z': ('x', [-1])}, {
                'x': [0],
                'y': [0]
            }),
            Dataset({'z': ('y', [1])}, {
                'x': [1],
                'y': [0]
            })
        ]
        actual = concat(objs, 'x')
        expected = Dataset({'z': (('x', 'y'), [[-1], [1]])}, {
            'x': [0, 1],
            'y': [0]
        })
        assert_identical(actual, expected)
Exemplo n.º 36
0
 def test_empty_input(self):
     assert_identical(Dataset(), combine_by_coords([]))
Exemplo n.º 37
0
def rolling_cumsum(ds: xr.Dataset, rolling_window: int = 3) -> xr.Dataset:

    ds_window = (ds.rolling(time=rolling_window,
                            center=True).sum().dropna(dim='time', how='all'))

    return ds_window
Exemplo n.º 38
0
    def _create_lookup_table(self, xr: xarray.Dataset):
        lookup = []
        if not self._disable_pbar:
            LOGGER.info("Create lookup table and convert to pytorch tensor")
        for basin in tqdm(self.basins, file=sys.stdout, disable=self._disable_pbar):

            # store data of each frequency as numpy array of shape [time steps, features]
            x_d, x_s, y = {}, {}, {}

            # keys: frequencies, values: array mapping each lowest-frequency
            # sample to its corresponding sample in this frequency
            frequency_maps = {}
            lowest_freq = utils.sort_frequencies(self.frequencies)[0]

            # converting from xarray to pandas DataFrame because resampling is much faster in pandas.
            df_native = xr.sel(basin=basin).to_dataframe()
            for freq in self.frequencies:
                if isinstance(self.cfg.dynamic_inputs, list):
                    dynamic_cols = self.cfg.dynamic_inputs
                else:
                    dynamic_cols = self.cfg.dynamic_inputs[freq]

                df_resampled = df_native[dynamic_cols + self.cfg.target_variables +
                                         self.cfg.static_inputs].resample(freq).mean()
                x_d[freq] = df_resampled[dynamic_cols].values
                y[freq] = df_resampled[self.cfg.target_variables].values
                if self.cfg.static_inputs:
                    x_s[freq] = df_resampled[self.cfg.static_inputs].values

                # number of frequency steps in one lowest-frequency step
                frequency_factor = pd.to_timedelta(lowest_freq) // pd.to_timedelta(freq)
                # array position i is the last entry of this frequency that belongs to the lowest-frequency sample i.
                frequency_maps[freq] = np.arange(len(df_resampled) // frequency_factor) \
                                       * frequency_factor + (frequency_factor - 1)

            # store first date of sequence to be able to restore dates during inference
            if not self.is_train:
                self.period_starts[basin] = pd.to_datetime(xr.sel(basin=basin)["date"].values[0])

            # we can ignore the deprecation warning about lists because we don't use the passed lists
            # after the validate_samples call. The alternative numba.typed.Lists is still experimental.
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', category=NumbaPendingDeprecationWarning)

                # checks inputs and outputs for each sequence. valid: flag = 1, invalid: flag = 0
                # manually unroll the dicts into lists to make sure the order of frequencies is consistent.
                # during inference, we want all samples with sufficient history (even if input is NaN), so
                # we pass x_d, x_s, y as None.
                flag = validate_samples(x_d=[x_d[freq] for freq in self.frequencies] if self.is_train else None,
                                        x_s=[x_s[freq] for freq in self.frequencies] if self.is_train and x_s else None,
                                        y=[y[freq] for freq in self.frequencies] if self.is_train else None,
                                        frequency_maps=[frequency_maps[freq] for freq in self.frequencies],
                                        seq_length=self.seq_len,
                                        predict_last_n=self._predict_last_n)
            valid_samples = np.argwhere(flag == 1)
            for f in valid_samples:
                # store pointer to basin and the sample's index in each frequency
                lookup.append((basin, [frequency_maps[freq][int(f)] for freq in self.frequencies]))

            self.x_d[basin] = {freq: torch.from_numpy(_x_d.astype(np.float32)) for freq, _x_d in x_d.items()}
            self.y[basin] = {freq: torch.from_numpy(_y.astype(np.float32)) for freq, _y in y.items()}
            if x_s:
                self.x_s[basin] = {freq: torch.from_numpy(_x_s.astype(np.float32)) for freq, _x_s in x_s.items()}

        self.lookup_table = {i: elem for i, elem in enumerate(lookup)}
        self.num_samples = len(self.lookup_table)
Exemplo n.º 39
0
def test_min_count_dataset(func):
    da = construct_dataarray(2, dtype=float, contains_nan=True, dask=False)
    ds = Dataset({'var1': da}, coords={'scalar': 0})
    actual = getattr(ds, func)(dim='x', skipna=True, min_count=3)['var1']
    expected = getattr(ds['var1'], func)(dim='x', skipna=True, min_count=3)
    assert_allclose(actual, expected)
Exemplo n.º 40
0
 def test_invalid_time_units_raises_eagerly(self) -> None:
     ds = Dataset({"time": ("time", [0, 1], {"units": "foobar since 123"})})
     with pytest.raises(ValueError, match=r"unable to decode time"):
         decode_cf(ds)
Exemplo n.º 41
0
 def test_concat_dim_is_variable(self):
     objs = [Dataset({'x': 0}), Dataset({'x': 1})]
     coord = Variable('y', [3, 4])
     expected = Dataset({'x': ('y', [0, 1]), 'y': [3, 4]})
     actual = concat(objs, coord)
     assert_identical(actual, expected)
Exemplo n.º 42
0
    def test_concat_loads_variables(self):
        # Test that concat() computes not-in-memory variables at most once
        # and loads them in the output, while leaving the input unaltered.
        d1 = build_dask_array('d1')
        c1 = build_dask_array('c1')
        d2 = build_dask_array('d2')
        c2 = build_dask_array('c2')
        d3 = build_dask_array('d3')
        c3 = build_dask_array('c3')
        # Note: c is a non-index coord.
        # Index coords are loaded by IndexVariable.__init__.
        ds1 = Dataset(data_vars={'d': ('x', d1)}, coords={'c': ('x', c1)})
        ds2 = Dataset(data_vars={'d': ('x', d2)}, coords={'c': ('x', c2)})
        ds3 = Dataset(data_vars={'d': ('x', d3)}, coords={'c': ('x', c3)})

        assert kernel_call_count == 0
        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars='different',
                        coords='different')
        # each kernel is computed exactly once
        assert kernel_call_count == 6
        # variables are loaded in the output
        assert isinstance(out['d'].data, np.ndarray)
        assert isinstance(out['c'].data, np.ndarray)

        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars='all',
                        coords='all')
        # no extra kernel calls
        assert kernel_call_count == 6
        assert isinstance(out['d'].data, dask.array.Array)
        assert isinstance(out['c'].data, dask.array.Array)

        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars=['d'],
                        coords=['c'])
        # no extra kernel calls
        assert kernel_call_count == 6
        assert isinstance(out['d'].data, dask.array.Array)
        assert isinstance(out['c'].data, dask.array.Array)

        out = xr.concat([ds1, ds2, ds3], dim='n', data_vars=[], coords=[])
        # variables are loaded once as we are validing that they're identical
        assert kernel_call_count == 12
        assert isinstance(out['d'].data, np.ndarray)
        assert isinstance(out['c'].data, np.ndarray)

        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars='different',
                        coords='different',
                        compat='identical')
        # compat=identical doesn't do any more kernel calls than compat=equals
        assert kernel_call_count == 18
        assert isinstance(out['d'].data, np.ndarray)
        assert isinstance(out['c'].data, np.ndarray)

        # When the test for different turns true halfway through,
        # stop computing variables as it would not have any benefit
        ds4 = Dataset(data_vars={'d': ('x', [2.0])},
                      coords={'c': ('x', [2.0])})
        out = xr.concat([ds1, ds2, ds4, ds3],
                        dim='n',
                        data_vars='different',
                        coords='different')
        # the variables of ds1 and ds2 were computed, but those of ds3 didn't
        assert kernel_call_count == 22
        assert isinstance(out['d'].data, dask.array.Array)
        assert isinstance(out['c'].data, dask.array.Array)
        # the data of ds1 and ds2 was loaded into numpy and then
        # concatenated to the data of ds3. Thus, only ds3 is computed now.
        out.compute()
        assert kernel_call_count == 24

        # Finally, test that riginals are unaltered
        assert ds1['d'].data is d1
        assert ds1['c'].data is c1
        assert ds2['d'].data is d2
        assert ds2['c'].data is c2
        assert ds3['d'].data is d3
        assert ds3['c'].data is c3
Exemplo n.º 43
0
 def test_nested_concat_too_many_dims_at_once(self):
     objs = [Dataset({"x": [0], "y": [1]}), Dataset({"y": [0], "x": [1]})]
     with pytest.raises(ValueError, match="not equal across datasets"):
         combine_nested(objs, concat_dim="x", coords="minimal")
Exemplo n.º 44
0
    def test_nested_concat(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        expected = Dataset({"x": [0, 1]})
        actual = combine_nested(objs, concat_dim="x")
        assert_identical(expected, actual)
        actual = combine_nested(objs, concat_dim=["x"])
        assert_identical(expected, actual)

        actual = combine_nested([actual], concat_dim=None)
        assert_identical(expected, actual)

        actual = combine_nested([actual], concat_dim="x")
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = combine_nested(objs, concat_dim="x")
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure combine_nested handles non-sorted variables
        objs = [
            Dataset({
                "x": ("a", [0]),
                "y": ("a", [0])
            }),
            Dataset({
                "y": ("a", [1]),
                "x": ("a", [1])
            }),
        ]
        actual = combine_nested(objs, concat_dim="a")
        expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})]
        actual = combine_nested(objs, concat_dim="x")
        expected = Dataset({"x": [0, 1], "y": [0]})
        assert_identical(expected, actual)
Exemplo n.º 45
0
    def test_combine_by_coords(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        actual = combine_by_coords([actual])
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset({
                "x": ("a", [0]),
                "y": ("a", [0]),
                "a": [0]
            }),
            Dataset({
                "x": ("a", [1]),
                "y": ("a", [1]),
                "a": [1]
            }),
        ]
        actual = combine_by_coords(objs)
        expected = Dataset({
            "x": ("a", [0, 1]),
            "y": ("a", [0, 1]),
            "a": [0, 1]
        })
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1], "y": [0, 1]})
        assert_equal(actual, expected)

        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
        with raises_regex(ValueError,
                          "Could not find any dimension coordinates"):
            combine_by_coords(objs)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
        with raises_regex(ValueError, "Every dimension needs a coordinate"):
            combine_by_coords(objs)

        def test_empty_input(self):
            assert_identical(Dataset(), combine_by_coords([]))
Exemplo n.º 46
0
def create_bout_ds(syn_data_type='random', lengths=(6, 2, 4, 7), num=0, nxpe=1, nype=1,
                   xproc=0, yproc=0, guards={}):

    # Set the shape of the data in this dataset
    t_length, x_length, y_length, z_length = lengths
    mxg = guards.get('x', 0)
    myg = guards.get('y', 0)
    x_length += 2*mxg
    y_length += 2*myg
    shape = (t_length, x_length, y_length, z_length)

    # calculate global nx, ny and nz
    nx = nxpe*lengths[1] + 2*mxg
    ny = nype*lengths[2]
    nz = 1*lengths[3]

    # Fill with some kind of synthetic data
    if syn_data_type is 'random':
        # Each dataset contains unique random noise
        np.random.seed(seed=num)
        data = np.random.randn(*shape)
    elif syn_data_type is 'linear':
        # Variables increase linearly across entire domain
        data = DataArray(-np.ones(shape), dims=('t', 'x', 'y', 'z'))

        t_array = DataArray((nx - 2*mxg)*ny*nz*np.arange(t_length, dtype=float),
                            dims='t')
        x_array = DataArray(ny*nz*(xproc*lengths[1] + mxg
                            + np.arange(lengths[1], dtype=float)),
                            dims='x')
        y_array = DataArray(nz*(yproc*lengths[2] + myg
                            + np.arange(lengths[2], dtype=float)),
                            dims='y')
        z_array = DataArray(np.arange(z_length, dtype=float), dims='z')

        data[:, mxg:x_length-mxg, myg:y_length-myg, :] = (
                t_array + x_array + y_array + z_array
                )
    elif syn_data_type is 'stepped':
        # Each dataset contains a different number depending on the filename
        data = np.ones(shape) * num
    elif isinstance(syn_data_type, int):
        data = np.ones(shape)* syn_data_type
    else:
        raise ValueError('Not a recognised choice of type of synthetic bout data.')

    T = DataArray(data, dims=['t', 'x', 'y', 'z'])
    n = DataArray(data, dims=['t', 'x', 'y', 'z'])
    ds = Dataset({'n': n, 'T': T})

    # BOUT_VERSION needed so that we know that number of points in z is MZ, not MZ-1 (as
    # it was in BOUT++ before v4.0
    ds['BOUT_VERSION'] = 4.3

    # Include grid data
    ds['NXPE'] = nxpe
    ds['NYPE'] = nype
    ds['NZPE'] = 1
    ds['PE_XIND'] = xproc
    ds['PE_YIND'] = yproc
    ds['MYPE'] = num

    ds['MXG'] = mxg
    ds['MYG'] = myg
    ds['nx'] = nx
    ds['ny'] = ny
    ds['nz'] = nz
    ds['MZ'] = 1*lengths[3]
    ds['MXSUB'] = lengths[1]
    ds['MYSUB'] = lengths[2]
    ds['MZSUB'] = lengths[3]
    ds['ixseps1'] = nx
    ds['ixseps2'] = nx
    ds['jyseps1_1'] = 0
    ds['jyseps1_2'] = ny
    ds['jyseps2_1'] = ny//2 - 1
    ds['jyseps2_2'] = ny//2 - 1
    ds['ny_inner'] = ny//2

    one = DataArray(np.ones((x_length, y_length)), dims=['x', 'y'])
    zero = DataArray(np.zeros((x_length, y_length)), dims=['x', 'y'])

    ds['zperiod'] = 1
    ds['ZMIN'] = 0.
    ds['ZMAX'] = 2.*np.pi
    ds['g11'] = one
    ds['g22'] = one
    ds['g33'] = one
    ds['g12'] = zero
    ds['g13'] = zero
    ds['g23'] = zero
    ds['g_11'] = one
    ds['g_22'] = one
    ds['g_33'] = one
    ds['g_12'] = zero
    ds['g_13'] = zero
    ds['g_23'] = zero
    ds['G1'] = zero
    ds['G2'] = zero
    ds['G3'] = zero
    ds['J'] = one
    ds['Bxy'] = one
    ds['zShift'] = zero

    ds['dx'] = 0.5*one
    ds['dy'] = 2.*one
    ds['dz'] = 0.7

    ds['iteration'] = t_length
    ds['t_array'] = DataArray(np.arange(t_length, dtype=float)*10., dims='t')

    return ds
Exemplo n.º 47
0
class TestCombineAuto:
    def test_combine_by_coords(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        actual = combine_by_coords([actual])
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset({
                "x": ("a", [0]),
                "y": ("a", [0]),
                "a": [0]
            }),
            Dataset({
                "x": ("a", [1]),
                "y": ("a", [1]),
                "a": [1]
            }),
        ]
        actual = combine_by_coords(objs)
        expected = Dataset({
            "x": ("a", [0, 1]),
            "y": ("a", [0, 1]),
            "a": [0, 1]
        })
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1], "y": [0, 1]})
        assert_equal(actual, expected)

        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
        with raises_regex(ValueError,
                          "Could not find any dimension coordinates"):
            combine_by_coords(objs)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
        with raises_regex(ValueError, "Every dimension needs a coordinate"):
            combine_by_coords(objs)

        def test_empty_input(self):
            assert_identical(Dataset(), combine_by_coords([]))

    @pytest.mark.parametrize(
        "join, expected",
        [
            ("outer", Dataset({
                "x": [0, 1],
                "y": [0, 1]
            })),
            ("inner", Dataset({
                "x": [0, 1],
                "y": []
            })),
            ("left", Dataset({
                "x": [0, 1],
                "y": [0]
            })),
            ("right", Dataset({
                "x": [0, 1],
                "y": [1]
            })),
        ],
    )
    def test_combine_coords_join(self, join, expected):
        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
        actual = combine_nested(objs, concat_dim="x", join=join)
        assert_identical(expected, actual)

    def test_combine_coords_join_exact(self):
        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
        with raises_regex(ValueError, "indexes along dimension"):
            combine_nested(objs, concat_dim="x", join="exact")

    @pytest.mark.parametrize(
        "combine_attrs, expected",
        [
            ("drop", Dataset({
                "x": [0, 1],
                "y": [0, 1]
            }, attrs={})),
            (
                "no_conflicts",
                Dataset({
                    "x": [0, 1],
                    "y": [0, 1]
                }, attrs={
                    "a": 1,
                    "b": 2
                }),
            ),
            ("override", Dataset({
                "x": [0, 1],
                "y": [0, 1]
            }, attrs={"a": 1})),
        ],
    )
    def test_combine_coords_combine_attrs(self, combine_attrs, expected):
        objs = [
            Dataset({
                "x": [0],
                "y": [0]
            }, attrs={"a": 1}),
            Dataset({
                "x": [1],
                "y": [1]
            }, attrs={
                "a": 1,
                "b": 2
            }),
        ]
        actual = combine_nested(objs,
                                concat_dim="x",
                                join="outer",
                                combine_attrs=combine_attrs)
        assert_identical(expected, actual)

        if combine_attrs == "no_conflicts":
            objs[1].attrs["a"] = 2
            with raises_regex(ValueError, "combine_attrs='no_conflicts'"):
                actual = combine_nested(objs,
                                        concat_dim="x",
                                        join="outer",
                                        combine_attrs=combine_attrs)

    def test_combine_coords_combine_attrs_identical(self):
        objs = [
            Dataset({
                "x": [0],
                "y": [0]
            }, attrs={"a": 1}),
            Dataset({
                "x": [1],
                "y": [1]
            }, attrs={"a": 1}),
        ]
        expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1})
        actual = combine_nested(objs,
                                concat_dim="x",
                                join="outer",
                                combine_attrs="identical")
        assert_identical(expected, actual)

        objs[1].attrs["b"] = 2

        with raises_regex(ValueError, "combine_attrs='identical'"):
            actual = combine_nested(objs,
                                    concat_dim="x",
                                    join="outer",
                                    combine_attrs="identical")

    def test_combine_nested_combine_attrs_drop_conflicts(self):
        objs = [
            Dataset({
                "x": [0],
                "y": [0]
            }, attrs={
                "a": 1,
                "b": 2,
                "c": 3
            }),
            Dataset({
                "x": [1],
                "y": [1]
            }, attrs={
                "a": 1,
                "b": 0,
                "d": 3
            }),
        ]
        expected = Dataset({
            "x": [0, 1],
            "y": [0, 1]
        },
                           attrs={
                               "a": 1,
                               "c": 3,
                               "d": 3
                           })
        actual = combine_nested(objs,
                                concat_dim="x",
                                join="outer",
                                combine_attrs="drop_conflicts")
        assert_identical(expected, actual)

    def test_infer_order_from_coords(self):
        data = create_test_data()
        objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
        actual = combine_by_coords(objs)
        expected = data
        assert expected.broadcast_equals(actual)

    def test_combine_leaving_bystander_dimensions(self):
        # Check non-monotonic bystander dimension coord doesn't raise
        # ValueError on combine (https://github.com/pydata/xarray/issues/3150)
        ycoord = ["a", "c", "b"]

        data = np.random.rand(7, 3)

        ds1 = Dataset(
            data_vars=dict(data=(["x", "y"], data[:3, :])),
            coords=dict(x=[1, 2, 3], y=ycoord),
        )

        ds2 = Dataset(
            data_vars=dict(data=(["x", "y"], data[3:, :])),
            coords=dict(x=[4, 5, 6, 7], y=ycoord),
        )

        expected = Dataset(
            data_vars=dict(data=(["x", "y"], data)),
            coords=dict(x=[1, 2, 3, 4, 5, 6, 7], y=ycoord),
        )

        actual = combine_by_coords((ds1, ds2))
        assert_identical(expected, actual)

    def test_combine_by_coords_previously_failed(self):
        # In the above scenario, one file is missing, containing the data for
        # one year's data for one variable.
        datasets = [
            Dataset({
                "a": ("x", [0]),
                "x": [0]
            }),
            Dataset({
                "b": ("x", [0]),
                "x": [0]
            }),
            Dataset({
                "a": ("x", [1]),
                "x": [1]
            }),
        ]
        expected = Dataset({
            "a": ("x", [0, 1]),
            "b": ("x", [0, np.nan])
        }, {"x": [0, 1]})
        actual = combine_by_coords(datasets)
        assert_identical(expected, actual)

    def test_combine_by_coords_still_fails(self):
        # concat can't handle new variables (yet):
        # https://github.com/pydata/xarray/issues/508
        datasets = [
            Dataset({"x": 0}, {"y": 0}),
            Dataset({"x": 1}, {
                "y": 1,
                "z": 1
            })
        ]
        with pytest.raises(ValueError):
            combine_by_coords(datasets, "y")

    def test_combine_by_coords_no_concat(self):
        objs = [Dataset({"x": 0}), Dataset({"y": 1})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": 0, "y": 1})
        assert_identical(expected, actual)

        objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": 0, "y": 1, "z": 2})
        assert_identical(expected, actual)

    def test_check_for_impossible_ordering(self):
        ds0 = Dataset({"x": [0, 1, 5]})
        ds1 = Dataset({"x": [2, 3]})
        with raises_regex(
                ValueError,
                "does not have monotonic global indexes along dimension x"):
            combine_by_coords([ds1, ds0])

    def test_combine_by_coords_incomplete_hypercube(self):
        # test that this succeeds with default fill_value
        x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]})
        x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]})
        x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]})
        actual = combine_by_coords([x1, x2, x3])
        expected = Dataset(
            {"a": (("y", "x"), [[1, 1], [1, np.nan]])},
            coords={
                "y": [0, 1],
                "x": [0, 1]
            },
        )
        assert_identical(expected, actual)

        # test that this fails if fill_value is None
        with pytest.raises(ValueError):
            combine_by_coords([x1, x2, x3], fill_value=None)
Exemplo n.º 48
0
    def postprocess(self, frame: xr.Dataset):
        import arpes.xarray_extensions  # pylint: disable=unused-import, redefined-outer-name

        frame = super().postprocess(frame)
        return frame.assign_attrs(frame.S.spectrum.attrs)
Exemplo n.º 49
0
 def test_empty_input(self):
     assert_identical(Dataset(), combine_nested([], concat_dim="x"))
Exemplo n.º 50
0
    def postprocess_final(self, data: xr.Dataset, scan_desc: dict = None):
        # attach the 'spectrum_type'
        # TODO move this logic into xarray extensions and customize here
        # only as necessary
        coord_names = tuple(sorted([c for c in data.dims if c != 'cycle']))

        spectrum_type = None
        if any(d in coord_names for d in {'x', 'y', 'z'}):
            coord_names = tuple(c for c in coord_names
                                if c not in {'x', 'y', 'z'})
            spectrum_types = {
                ('eV', ): 'spem',
                (
                    'eV',
                    'phi',
                ): 'ucut',
            }
            spectrum_type = spectrum_types.get(coord_names)
        else:
            spectrum_types = {
                ('eV', ): 'xps',
                (
                    'eV',
                    'phi',
                    'theta',
                ): 'map',
                (
                    'eV',
                    'phi',
                    'psi',
                ): 'map',
                (
                    'beta',
                    'eV',
                    'phi',
                ): 'map',
                (
                    'eV',
                    'hv',
                    'phi',
                ): 'hv_map',
                ('eV', 'phi'): 'cut',
            }
            spectrum_type = spectrum_types.get(coord_names)

        if 'phi' not in data.coords:
            # XPS
            data.coords['phi'] = 0
            for s in data.S.spectra:
                s.coords['phi'] = 0

        if spectrum_type is not None:
            data.attrs['spectrum_type'] = spectrum_type
            if 'spectrum' in data.data_vars:
                data.spectrum.attrs['spectrum_type'] = spectrum_type

        ls = [data] + data.S.spectra
        for l in ls:
            for k, key_fn in self.ATTR_TRANSFORMS.items():
                if k in l.attrs:
                    transformed = key_fn(l.attrs[k])
                    if isinstance(transformed, dict):
                        l.attrs.update(transformed)
                    else:
                        l.attrs[k] = transformed

        for l in ls:
            for k, v in self.MERGE_ATTRS.items():
                if k not in l.attrs:
                    l.attrs[k] = v

        for l in ls:
            for c in self.ENSURE_COORDS_EXIST:
                if c not in l.coords:
                    if c in l.attrs:
                        l.coords[c] = l.attrs[c]
                    else:
                        warnings.warn(
                            f'Could not assign coordinate {c} from attributes, assigning np.nan instead.'
                        )
                        l.coords[c] = np.nan

        for l in ls:
            if 'chi' in l.coords and 'chi_offset' not in l.attrs:
                l.attrs['chi_offset'] = l.coords['chi'].item()

        return data
Exemplo n.º 51
0
class TestNestedCombine:
    def test_nested_concat(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        expected = Dataset({"x": [0, 1]})
        actual = combine_nested(objs, concat_dim="x")
        assert_identical(expected, actual)
        actual = combine_nested(objs, concat_dim=["x"])
        assert_identical(expected, actual)

        actual = combine_nested([actual], concat_dim=None)
        assert_identical(expected, actual)

        actual = combine_nested([actual], concat_dim="x")
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = combine_nested(objs, concat_dim="x")
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure combine_nested handles non-sorted variables
        objs = [
            Dataset({
                "x": ("a", [0]),
                "y": ("a", [0])
            }),
            Dataset({
                "y": ("a", [1]),
                "x": ("a", [1])
            }),
        ]
        actual = combine_nested(objs, concat_dim="a")
        expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})]
        actual = combine_nested(objs, concat_dim="x")
        expected = Dataset({"x": [0, 1], "y": [0]})
        assert_identical(expected, actual)

    @pytest.mark.parametrize(
        "join, expected",
        [
            ("outer", Dataset({
                "x": [0, 1],
                "y": [0, 1]
            })),
            ("inner", Dataset({
                "x": [0, 1],
                "y": []
            })),
            ("left", Dataset({
                "x": [0, 1],
                "y": [0]
            })),
            ("right", Dataset({
                "x": [0, 1],
                "y": [1]
            })),
        ],
    )
    def test_combine_nested_join(self, join, expected):
        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
        actual = combine_nested(objs, concat_dim="x", join=join)
        assert_identical(expected, actual)

    def test_combine_nested_join_exact(self):
        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
        with raises_regex(ValueError, "indexes along dimension"):
            combine_nested(objs, concat_dim="x", join="exact")

    def test_empty_input(self):
        assert_identical(Dataset(), combine_nested([], concat_dim="x"))

    # Fails because of concat's weird treatment of dimension coords, see #2975
    @pytest.mark.xfail
    def test_nested_concat_too_many_dims_at_once(self):
        objs = [Dataset({"x": [0], "y": [1]}), Dataset({"y": [0], "x": [1]})]
        with pytest.raises(ValueError, match="not equal across datasets"):
            combine_nested(objs, concat_dim="x", coords="minimal")

    def test_nested_concat_along_new_dim(self):
        objs = [
            Dataset({
                "a": ("x", [10]),
                "x": [0]
            }),
            Dataset({
                "a": ("x", [20]),
                "x": [0]
            }),
        ]
        expected = Dataset({"a": (("t", "x"), [[10], [20]]), "x": [0]})
        actual = combine_nested(objs, concat_dim="t")
        assert_identical(expected, actual)

        # Same but with a DataArray as new dim, see GH #1988 and #2647
        dim = DataArray([100, 150], name="baz", dims="baz")
        expected = Dataset({
            "a": (("baz", "x"), [[10], [20]]),
            "x": [0],
            "baz": [100, 150]
        })
        actual = combine_nested(objs, concat_dim=dim)
        assert_identical(expected, actual)

    def test_nested_merge(self):
        data = Dataset({"x": 0})
        actual = combine_nested([data, data, data], concat_dim=None)
        assert_identical(data, actual)

        ds1 = Dataset({"a": ("x", [1, 2]), "x": [0, 1]})
        ds2 = Dataset({"a": ("x", [2, 3]), "x": [1, 2]})
        expected = Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]})
        actual = combine_nested([ds1, ds2], concat_dim=None)
        assert_identical(expected, actual)
        actual = combine_nested([ds1, ds2], concat_dim=[None])
        assert_identical(expected, actual)

        tmp1 = Dataset({"x": 0})
        tmp2 = Dataset({"x": np.nan})
        actual = combine_nested([tmp1, tmp2], concat_dim=None)
        assert_identical(tmp1, actual)
        actual = combine_nested([tmp1, tmp2], concat_dim=[None])
        assert_identical(tmp1, actual)

        # Single object, with a concat_dim explicitly provided
        # Test the issue reported in GH #1988
        objs = [Dataset({"x": 0, "y": 1})]
        dim = DataArray([100], name="baz", dims="baz")
        actual = combine_nested(objs, concat_dim=[dim])
        expected = Dataset({
            "x": ("baz", [0]),
            "y": ("baz", [1])
        }, {"baz": [100]})
        assert_identical(expected, actual)

        # Just making sure that auto_combine is doing what is
        # expected for non-scalar values, too.
        objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})]
        dim = DataArray([100], name="baz", dims="baz")
        actual = combine_nested(objs, concat_dim=[dim])
        expected = Dataset(
            {
                "x": (("baz", "z"), [[0, 1]]),
                "y": (("baz", "z"), [[1, 2]])
            },
            {"baz": [100]},
        )
        assert_identical(expected, actual)

    def test_concat_multiple_dims(self):
        objs = [
            [
                Dataset({"a": (("x", "y"), [[0]])}),
                Dataset({"a": (("x", "y"), [[1]])})
            ],
            [
                Dataset({"a": (("x", "y"), [[2]])}),
                Dataset({"a": (("x", "y"), [[3]])})
            ],
        ]
        actual = combine_nested(objs, concat_dim=["x", "y"])
        expected = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])})
        assert_identical(expected, actual)

    def test_concat_name_symmetry(self):
        """Inspired by the discussion on GH issue #2777"""

        da1 = DataArray(name="a", data=[[0]], dims=["x", "y"])
        da2 = DataArray(name="b", data=[[1]], dims=["x", "y"])
        da3 = DataArray(name="a", data=[[2]], dims=["x", "y"])
        da4 = DataArray(name="b", data=[[3]], dims=["x", "y"])

        x_first = combine_nested([[da1, da2], [da3, da4]],
                                 concat_dim=["x", "y"])
        y_first = combine_nested([[da1, da3], [da2, da4]],
                                 concat_dim=["y", "x"])

        assert_identical(x_first, y_first)

    def test_concat_one_dim_merge_another(self):
        data = create_test_data()
        data1 = data.copy(deep=True)
        data2 = data.copy(deep=True)

        objs = [
            [
                data1.var1.isel(dim2=slice(4)),
                data2.var1.isel(dim2=slice(4, 9))
            ],
            [
                data1.var2.isel(dim2=slice(4)),
                data2.var2.isel(dim2=slice(4, 9))
            ],
        ]

        expected = data[["var1", "var2"]]
        actual = combine_nested(objs, concat_dim=[None, "dim2"])
        assert_identical(expected, actual)

    def test_auto_combine_2d(self):
        ds = create_test_data

        partway1 = concat([ds(0), ds(3)], dim="dim1")
        partway2 = concat([ds(1), ds(4)], dim="dim1")
        partway3 = concat([ds(2), ds(5)], dim="dim1")
        expected = concat([partway1, partway2, partway3], dim="dim2")

        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]]
        result = combine_nested(datasets, concat_dim=["dim1", "dim2"])
        assert_equal(result, expected)

    def test_auto_combine_2d_combine_attrs_kwarg(self):
        ds = create_test_data

        partway1 = concat([ds(0), ds(3)], dim="dim1")
        partway2 = concat([ds(1), ds(4)], dim="dim1")
        partway3 = concat([ds(2), ds(5)], dim="dim1")
        expected = concat([partway1, partway2, partway3], dim="dim2")

        expected_dict = {}
        expected_dict["drop"] = expected.copy(deep=True)
        expected_dict["drop"].attrs = {}
        expected_dict["no_conflicts"] = expected.copy(deep=True)
        expected_dict["no_conflicts"].attrs = {
            "a": 1,
            "b": 2,
            "c": 3,
            "d": 4,
            "e": 5,
            "f": 6,
        }
        expected_dict["override"] = expected.copy(deep=True)
        expected_dict["override"].attrs = {"a": 1}

        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]]

        datasets[0][0].attrs = {"a": 1}
        datasets[0][1].attrs = {"a": 1, "b": 2}
        datasets[0][2].attrs = {"a": 1, "c": 3}
        datasets[1][0].attrs = {"a": 1, "d": 4}
        datasets[1][1].attrs = {"a": 1, "e": 5}
        datasets[1][2].attrs = {"a": 1, "f": 6}

        with raises_regex(ValueError, "combine_attrs='identical'"):
            result = combine_nested(datasets,
                                    concat_dim=["dim1", "dim2"],
                                    combine_attrs="identical")

        for combine_attrs in expected_dict:
            result = combine_nested(datasets,
                                    concat_dim=["dim1", "dim2"],
                                    combine_attrs=combine_attrs)
            assert_identical(result, expected_dict[combine_attrs])

    def test_combine_nested_missing_data_new_dim(self):
        # Your data includes "time" and "station" dimensions, and each year's
        # data has a different set of stations.
        datasets = [
            Dataset({
                "a": ("x", [2, 3]),
                "x": [1, 2]
            }),
            Dataset({
                "a": ("x", [1, 2]),
                "x": [0, 1]
            }),
        ]
        expected = Dataset(
            {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])},
            {"x": [0, 1, 2]})
        actual = combine_nested(datasets, concat_dim="t")
        assert_identical(expected, actual)

    def test_invalid_hypercube_input(self):
        ds = create_test_data

        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4)]]
        with raises_regex(ValueError,
                          "sub-lists do not have consistent lengths"):
            combine_nested(datasets, concat_dim=["dim1", "dim2"])

        datasets = [[ds(0), ds(1)], [[ds(3), ds(4)]]]
        with raises_regex(ValueError,
                          "sub-lists do not have consistent depths"):
            combine_nested(datasets, concat_dim=["dim1", "dim2"])

        datasets = [[ds(0), ds(1)], [ds(3), ds(4)]]
        with raises_regex(ValueError, "concat_dims has length"):
            combine_nested(datasets, concat_dim=["dim1"])

    def test_merge_one_dim_concat_another(self):
        objs = [
            [
                Dataset({"foo": ("x", [0, 1])}),
                Dataset({"bar": ("x", [10, 20])})
            ],
            [
                Dataset({"foo": ("x", [2, 3])}),
                Dataset({"bar": ("x", [30, 40])})
            ],
        ]
        expected = Dataset({
            "foo": ("x", [0, 1, 2, 3]),
            "bar": ("x", [10, 20, 30, 40])
        })

        actual = combine_nested(objs, concat_dim=["x", None], compat="equals")
        assert_identical(expected, actual)

        # Proving it works symmetrically
        objs = [
            [Dataset({"foo": ("x", [0, 1])}),
             Dataset({"foo": ("x", [2, 3])})],
            [
                Dataset({"bar": ("x", [10, 20])}),
                Dataset({"bar": ("x", [30, 40])})
            ],
        ]
        actual = combine_nested(objs, concat_dim=[None, "x"], compat="equals")
        assert_identical(expected, actual)

    def test_combine_concat_over_redundant_nesting(self):
        objs = [[Dataset({"x": [0]}), Dataset({"x": [1]})]]
        actual = combine_nested(objs, concat_dim=[None, "x"])
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        objs = [[Dataset({"x": [0]})], [Dataset({"x": [1]})]]
        actual = combine_nested(objs, concat_dim=["x", None])
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        objs = [[Dataset({"x": [0]})]]
        actual = combine_nested(objs, concat_dim=[None, None])
        expected = Dataset({"x": [0]})
        assert_identical(expected, actual)

    @pytest.mark.parametrize("fill_value",
                             [dtypes.NA, 2, 2.0, {
                                 "a": 2,
                                 "b": 1
                             }])
    def test_combine_nested_fill_value(self, fill_value):
        datasets = [
            Dataset({
                "a": ("x", [2, 3]),
                "b": ("x", [-2, 1]),
                "x": [1, 2]
            }),
            Dataset({
                "a": ("x", [1, 2]),
                "b": ("x", [3, -1]),
                "x": [0, 1]
            }),
        ]
        if fill_value == dtypes.NA:
            # if we supply the default, we expect the missing value for a
            # float array
            fill_value_a = fill_value_b = np.nan
        elif isinstance(fill_value, dict):
            fill_value_a = fill_value["a"]
            fill_value_b = fill_value["b"]
        else:
            fill_value_a = fill_value_b = fill_value
        expected = Dataset(
            {
                "a":
                (("t", "x"), [[fill_value_a, 2, 3], [1, 2, fill_value_a]]),
                "b":
                (("t", "x"), [[fill_value_b, -2, 1], [3, -1, fill_value_b]]),
            },
            {"x": [0, 1, 2]},
        )
        actual = combine_nested(datasets,
                                concat_dim="t",
                                fill_value=fill_value)
        assert_identical(expected, actual)
Exemplo n.º 52
0
    def test_convert_units(self, typename, variant):
        if typename == "Variable":
            if variant != "data":
                pytest.skip("Variable doesn't store coordinates")

            data = np.linspace(0, 1, 3) * unit_registry.m
            obj = Variable(dims="x", data=data)
            units = {None: unit_registry.mm}
            expected_units = units
        elif typename == "DataArray":
            unit_variants = {
                "data": (unit_registry.Pa, 1, 1),
                "dims": (1, unit_registry.s, 1),
                "coords": (1, 1, unit_registry.m),
            }
            data_unit, dim_unit, coord_unit = unit_variants.get(variant)

            coords = {
                "data": {},
                "dims": {
                    "x": [0, 1, 2] * dim_unit
                },
                "coords": {
                    "u": ("x", [10, 3, 4] * coord_unit)
                },
            }

            obj = DataArray(
                dims="x",
                data=np.linspace(0, 1, 3) * data_unit,
                coords=coords.get(variant),
            )
            template = {
                **{
                    obj.name: None
                },
                **{name: None
                   for name in obj.coords},
            }
            units = {
                "data": {
                    None: unit_registry.hPa
                },
                "dims": {
                    "x": unit_registry.ms
                },
                "coords": {
                    "u": unit_registry.mm
                },
            }.get(variant)

            expected_units = {**template, **units}
        elif typename == "Dataset":
            unit_variants = {
                "data": ((unit_registry.s, unit_registry.kg), 1, 1),
                "dims": ((1, 1), unit_registry.s, 1),
                "coords": ((1, 1), 1, unit_registry.m),
            }
            (data_unit1,
             data_unit2), dim_unit, coord_unit = unit_variants.get(variant)

            coords = {
                "data": {},
                "dims": {
                    "x": [0, 1, 2] * dim_unit
                },
                "coords": {
                    "u": ("x", [10, 3, 4] * coord_unit)
                },
            }

            obj = Dataset(
                data_vars={
                    "a": ("x", np.linspace(-1, 1, 3) * data_unit1),
                    "b": ("x", np.linspace(1, 2, 3) * data_unit2),
                },
                coords=coords.get(variant),
            )

            template = {
                **{name: None
                   for name in obj.data_vars.keys()},
                **{name: None
                   for name in obj.coords.keys()},
            }
            units = {
                "data": {
                    "a": unit_registry.ms,
                    "b": unit_registry.g
                },
                "dims": {
                    "x": unit_registry.ms
                },
                "coords": {
                    "u": unit_registry.mm
                },
            }.get(variant)
            expected_units = {**template, **units}

        actual = conversion.convert_units(obj, units)

        assert conversion.extract_units(actual) == expected_units
        assert_equal(obj, actual)
Exemplo n.º 53
0
 def test_no_concatenation_needed(self):
     ds = Dataset({"foo": ("x", [0, 1])})
     expected = {(): ds}
     actual, concat_dims = _infer_concat_order_from_coords([ds])
     assert_combined_tile_ids_equal(expected, actual)
     assert concat_dims == []
hdng = DataArray(hdngens, coords=coords1, dims=dims1)
ptch = DataArray(ptchens, coords=coords1, dims=dims1)
roll = DataArray(rollens, coords=coords1, dims=dims1)
p = DataArray(pens, coords=coords1, dims=dims1)

# data_vars = dict(uwrawnotilt=uwrawnotilt, vwrawnotilt=vwrawnotilt, uwraw=uwraw, vwraw=vwraw, uwnotilt=uwnotilt, vwnotilt=vwnotilt, uw=uw, vw=vw, tke=tke, aniso=aniso, u=u, v=v, w5=w5, urms=urms, vrms=vrms, w5rms=w5rms, uz=uz, vz=vz, hdng=hdng, ptch=ptch, roll=roll, p=p)

data_vars = dict(uwrawnotilt=uwrawnotilt,
                 vwrawnotilt=vwrawnotilt,
                 uwraw=uwraw,
                 vwraw=vwraw,
                 uwnotilt=uwnotilt,
                 vwnotilt=vwnotilt,
                 uw=uw,
                 vw=vw,
                 tke=tke,
                 u=u,
                 v=v,
                 w5=w5,
                 urms=urms,
                 vrms=vrms,
                 w5rms=w5rms,
                 uz=uz,
                 vz=vz,
                 hdng=hdng,
                 ptch=ptch,
                 roll=roll,
                 p=p)

Dataset(data_vars=data_vars, coords=coords).to_netcdf(fname_rs_out)
Exemplo n.º 55
0
class TestXarrayFunctions:
    @pytest.mark.parametrize(
        "obj",
        (
            pytest.param(Variable("x", np.linspace(0, 1, 5)), id="Variable"),
            pytest.param(
                DataArray(
                    data=np.linspace(0, 1, 5),
                    dims="x",
                    coords={"u": ("x", np.arange(5))},
                ),
                id="DataArray",
            ),
            pytest.param(
                Dataset(
                    {
                        "a": ("x", np.linspace(-1, 1, 5)),
                        "b": ("x", np.linspace(0, 1, 5)),
                    },
                    coords={"u": ("x", np.arange(5))},
                ),
                id="Dataset",
            ),
        ),
    )
    @pytest.mark.parametrize(
        "units",
        (
            pytest.param({
                None: None,
                "u": None
            }, id="no units"),
            pytest.param({
                None: unit_registry.m,
                "u": None
            }, id="data units"),
            pytest.param({
                None: None,
                "u": unit_registry.s
            }, id="coord units"),
        ),
    )
    def test_attach_units(self, obj, units):
        if isinstance(obj, Variable) and "u" in units:
            pytest.skip(msg="variables don't have coordinates")

        if isinstance(obj, Dataset):
            units = units.copy()
            data_units = units.pop(None)
            units.update({"a": data_units, "b": data_units})

        actual = conversion.attach_units(obj, units)

        assert conversion.extract_units(actual) == units

    @pytest.mark.parametrize(
        ["obj", "units"],
        (
            pytest.param(
                DataArray(dims="x", coords={
                    "x": [],
                    "u": ("x", [])
                }),
                {
                    None: "hPa",
                    "x": "m"
                },
                id="DataArray",
            ),
            pytest.param(
                Dataset(
                    data_vars={
                        "a": ("x", []),
                        "b": ("x", [])
                    },
                    coords={
                        "x": [],
                        "u": ("x", [])
                    },
                ),
                {
                    "a": "K",
                    "b": "hPa",
                    "u": "m"
                },
                id="Dataset",
            ),
            pytest.param(Variable("x", []), {None: "hPa"}, id="Variable"),
        ),
    )
    def test_attach_unit_attributes(self, obj, units):
        actual = conversion.attach_unit_attributes(obj, units)
        assert units == filter_none_values(
            conversion.extract_unit_attributes(actual))

    @pytest.mark.parametrize(
        "variant",
        (
            "data",
            pytest.param(
                "dims",
                marks=pytest.mark.xfail(reason="indexes don't support units")),
            "coords",
        ),
    )
    @pytest.mark.parametrize("typename", ("Variable", "DataArray", "Dataset"))
    def test_convert_units(self, typename, variant):
        if typename == "Variable":
            if variant != "data":
                pytest.skip("Variable doesn't store coordinates")

            data = np.linspace(0, 1, 3) * unit_registry.m
            obj = Variable(dims="x", data=data)
            units = {None: unit_registry.mm}
            expected_units = units
        elif typename == "DataArray":
            unit_variants = {
                "data": (unit_registry.Pa, 1, 1),
                "dims": (1, unit_registry.s, 1),
                "coords": (1, 1, unit_registry.m),
            }
            data_unit, dim_unit, coord_unit = unit_variants.get(variant)

            coords = {
                "data": {},
                "dims": {
                    "x": [0, 1, 2] * dim_unit
                },
                "coords": {
                    "u": ("x", [10, 3, 4] * coord_unit)
                },
            }

            obj = DataArray(
                dims="x",
                data=np.linspace(0, 1, 3) * data_unit,
                coords=coords.get(variant),
            )
            template = {
                **{
                    obj.name: None
                },
                **{name: None
                   for name in obj.coords},
            }
            units = {
                "data": {
                    None: unit_registry.hPa
                },
                "dims": {
                    "x": unit_registry.ms
                },
                "coords": {
                    "u": unit_registry.mm
                },
            }.get(variant)

            expected_units = {**template, **units}
        elif typename == "Dataset":
            unit_variants = {
                "data": ((unit_registry.s, unit_registry.kg), 1, 1),
                "dims": ((1, 1), unit_registry.s, 1),
                "coords": ((1, 1), 1, unit_registry.m),
            }
            (data_unit1,
             data_unit2), dim_unit, coord_unit = unit_variants.get(variant)

            coords = {
                "data": {},
                "dims": {
                    "x": [0, 1, 2] * dim_unit
                },
                "coords": {
                    "u": ("x", [10, 3, 4] * coord_unit)
                },
            }

            obj = Dataset(
                data_vars={
                    "a": ("x", np.linspace(-1, 1, 3) * data_unit1),
                    "b": ("x", np.linspace(1, 2, 3) * data_unit2),
                },
                coords=coords.get(variant),
            )

            template = {
                **{name: None
                   for name in obj.data_vars.keys()},
                **{name: None
                   for name in obj.coords.keys()},
            }
            units = {
                "data": {
                    "a": unit_registry.ms,
                    "b": unit_registry.g
                },
                "dims": {
                    "x": unit_registry.ms
                },
                "coords": {
                    "u": unit_registry.mm
                },
            }.get(variant)
            expected_units = {**template, **units}

        actual = conversion.convert_units(obj, units)

        assert conversion.extract_units(actual) == expected_units
        assert_equal(obj, actual)

    @pytest.mark.parametrize(
        "units",
        (
            pytest.param({
                None: None,
                "u": None
            }, id="no units"),
            pytest.param({
                None: unit_registry.m,
                "u": None
            }, id="data units"),
            pytest.param({
                None: None,
                "u": unit_registry.s
            }, id="coord units"),
            pytest.param({
                None: unit_registry.m,
                "u": unit_registry.s
            },
                         id="data and coord units"),
        ),
    )
    @pytest.mark.parametrize("typename", ("Variable", "DataArray", "Dataset"))
    def test_extract_units(self, typename, units):
        if typename == "Variable":
            data_units = units.get(None) or 1
            data = np.linspace(0, 1, 2) * data_units

            units = units.copy()
            units.pop("u")

            obj = Variable("x", data)
        elif typename == "DataArray":
            data_units = units.get(None) or 1
            data = np.linspace(0, 1, 2) * data_units

            coord_units = units.get("u") or 1
            coords = {"u": ("x", np.arange(2) * coord_units)}

            obj = DataArray(data, dims="x", coords=coords)
        elif typename == "Dataset":
            data_units = units.get(None)
            data1 = np.linspace(-1, 1, 2) * (data_units or 1)
            data2 = np.linspace(0, 1, 2) * (data_units or 1)

            coord_units = units.get("u") or 1
            coords = {"u": ("x", np.arange(2) * coord_units)}

            units = units.copy()
            units.pop(None)
            units.update({"a": data_units, "b": data_units})

            obj = Dataset({
                "a": ("x", data1),
                "b": ("x", data2)
            },
                          coords=coords)

        assert conversion.extract_units(obj) == units

    @pytest.mark.parametrize(
        ["obj", "expected"],
        (
            pytest.param(
                DataArray(
                    coords={
                        "x": ("x", [], {
                            "units": "m"
                        }),
                        "u": ("x", [], {
                            "units": "s"
                        }),
                    },
                    attrs={"units": "hPa"},
                    dims="x",
                ),
                {
                    "x": "m",
                    "u": "s",
                    None: "hPa"
                },
                id="DataArray",
            ),
            pytest.param(
                Dataset(
                    data_vars={
                        "a": ("x", [], {
                            "units": "K"
                        }),
                        "b": ("x", [], {
                            "units": "hPa"
                        }),
                    },
                    coords={
                        "x": ("x", [], {
                            "units": "m"
                        }),
                        "u": ("x", [], {
                            "units": "s"
                        }),
                    },
                ),
                {
                    "a": "K",
                    "b": "hPa",
                    "x": "m",
                    "u": "s"
                },
                id="Dataset",
            ),
            pytest.param(Variable("x", [], {"units": "hPa"}), {None: "hPa"},
                         id="Variable"),
        ),
    )
    def test_extract_unit_attributes(self, obj, expected):
        actual = conversion.extract_unit_attributes(obj)
        assert expected == actual

    @pytest.mark.parametrize(
        "obj",
        (
            pytest.param(Variable("x", [0, 4, 3] * unit_registry.m),
                         id="Variable"),
            pytest.param(
                DataArray(
                    dims="x",
                    data=[0, 4, 3] * unit_registry.m,
                    coords={"u": ("x", [2, 3, 4] * unit_registry.s)},
                ),
                id="DataArray",
            ),
            pytest.param(
                Dataset(
                    data_vars={
                        "a": ("x", [3, 2, 5] * unit_registry.Pa),
                        "b": ("x", [0, 2, -1] * unit_registry.kg),
                    },
                    coords={"u": ("x", [2, 3, 4] * unit_registry.s)},
                ),
                id="Dataset",
            ),
        ),
    )
    def test_strip_units(self, obj):
        if isinstance(obj, Variable):
            expected_units = {None: None}
        elif isinstance(obj, DataArray):
            expected_units = {None: None}
            expected_units.update({name: None for name in obj.coords.keys()})
        elif isinstance(obj, Dataset):
            expected_units = {name: None for name in obj.variables.keys()}

        actual = conversion.strip_units(obj)
        assert conversion.extract_units(actual) == expected_units

    @pytest.mark.parametrize(
        ["obj", "expected"],
        (
            pytest.param(
                DataArray(
                    coords={
                        "x": ("x", [], {
                            "units": "m"
                        }),
                        "u": ("x", [], {
                            "units": "s"
                        }),
                    },
                    attrs={"units": "hPa"},
                    dims="x",
                ),
                {
                    "x": "m",
                    "u": "s",
                    None: "hPa"
                },
                id="DataArray",
            ),
            pytest.param(
                Dataset(
                    data_vars={
                        "a": ("x", [], {
                            "units": "K"
                        }),
                        "b": ("x", [], {
                            "units": "hPa"
                        }),
                    },
                    coords={
                        "x": ("x", [], {
                            "units": "m"
                        }),
                        "u": ("x", [], {
                            "units": "s"
                        }),
                    },
                ),
                {
                    "a": "K",
                    "b": "hPa",
                    "x": "m",
                    "u": "s"
                },
                id="Dataset",
            ),
            pytest.param(Variable("x", [], {"units": "hPa"}), {None: "hPa"},
                         id="Variable"),
        ),
    )
    def test_strip_unit_attributes(self, obj, expected):
        actual = conversion.strip_unit_attributes(obj)
        expected = {}

        assert (filter_none_values(
            conversion.extract_unit_attributes(actual)) == expected)
Exemplo n.º 56
0
 def test_invalid_coordinates(self):
     # regression test for GH308
     original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})})
     actual = conventions.decode_cf(original)
     self.assertDatasetIdentical(original, actual)
Exemplo n.º 57
0
def update_time_slice(store: Union[str, MutableMapping],
                      insert_index: int,
                      time_slice: xr.Dataset,
                      mode: str,
                      chunk_sizes: Dict[str, int] = None):
    """
    Update existing zarr dataset by new time slice.

    :param store: A zarr store.
    :param insert_index: Time index
    :param time_slice: Time slice to insert
    :param mode: Update mode, 'insert' or 'replace'
    :param chunk_sizes: desired chunk sizes
    """

    if mode not in ('insert', 'replace'):
        raise ValueError(f'illegal mode value: {mode!r}')

    insert_mode = mode == 'insert'

    time_var_names = []
    encoding = {}
    with xr.open_zarr(store) as cube:
        for var_name in cube.variables:
            var = cube[var_name]
            if var.ndim >= 1 and 'time' in var.dims:
                if var.dims[0] != 'time':
                    raise ValueError(
                        f"dimension 'time' of variable {var_name!r} must be first dimension"
                    )
                time_var_names.append(var_name)
                enc = dict(cube[var_name].encoding)
                # xarray 0.17+ supports engine preferred chunks if exposed by the backend
                # zarr does that, but when we use the new 'preferred_chunks' when writing to zarr
                # it raises and says, 'preferred_chunks' is an unsupported encoding
                if 'preferred_chunks' in enc:
                    del enc['preferred_chunks']
                encoding[var_name] = enc

    if chunk_sizes:
        time_slice = chunk_dataset(time_slice, chunk_sizes, format_name='zarr')
    temp_dir = tempfile.TemporaryDirectory(prefix='xcube-time-slice-',
                                           suffix='.zarr')
    time_slice.to_zarr(temp_dir.name, encoding=encoding)
    slice_root_group = zarr.open(temp_dir.name, mode='r')
    slice_arrays = dict(slice_root_group.arrays())

    cube_root_group = zarr.open(store, mode='r+')
    for var_name, var_array in cube_root_group.arrays():
        if var_name in time_var_names:
            slice_array = slice_arrays[var_name]
            if insert_mode:
                # Add one empty time step
                empty = zarr.creation.empty(slice_array.shape,
                                            dtype=var_array.dtype)
                var_array.append(empty, axis=0)
                # Shift contents
                var_array[insert_index + 1:, ...] = var_array[insert_index:-1,
                                                              ...]
            # Replace slice
            var_array[insert_index, ...] = slice_array[0]

    unchunk_dataset(store, coords_only=True)
Exemplo n.º 58
0
 def test_combine_coords_join_exact(self):
     objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
     with raises_regex(ValueError, "indexes along dimension"):
         combine_nested(objs, concat_dim="x", join="exact")
Exemplo n.º 59
0
    def test_auto_combine(self, combine):
        objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
        actual = combine(objs)
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        actual = combine([actual])
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})]
        actual = combine(objs)
        expected = Dataset({'x': [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])),
            Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))]))
        ]
        actual = combine(objs)
        expected = Dataset({'x': ('a', [0, 1]), 'y': ('a', [0, 1])})
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})]
        with raises_regex(ValueError, 'too many .* dimensions'):
            combine(objs)

        objs = [Dataset({'x': 0}), Dataset({'x': 1})]
        with raises_regex(ValueError, 'cannot infer dimension'):
            combine(objs)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})]
        with pytest.raises(KeyError):
            combine(objs)
Exemplo n.º 60
0
 def apply(self, data):
     imgdata = {}
     for band in self.components:
         imgdata[band] = (data.dims, self.get_8bit_value(data, band))
     imgdataset = Dataset(imgdata)
     return imgdataset