Exemplo n.º 1
0
def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True):
    """Like :py:func:`numpy.testing.assert_allclose`, but for xarray objects.

    Raises an AssertionError if two objects are not equal up to desired
    tolerance.

    Parameters
    ----------
    a : xarray.Dataset, xarray.DataArray or xarray.Variable
        The first object to compare.
    b : xarray.Dataset, xarray.DataArray or xarray.Variable
        The second object to compare.
    rtol : float, optional
        Relative tolerance.
    atol : float, optional
        Absolute tolerance.
    decode_bytes : bool, optional
        Whether byte dtypes should be decoded to strings as UTF-8 or not.
        This is useful for testing serialization methods on Python 3 that
        return saved strings as bytes.

    See also
    --------
    assert_identical, assert_equal, numpy.testing.assert_allclose
    """
    __tracebackhide__ = True
    assert type(a) == type(b)

    equiv = functools.partial(_data_allclose_or_equiv,
                              rtol=rtol,
                              atol=atol,
                              decode_bytes=decode_bytes)
    equiv.__name__ = "allclose"

    def compat_variable(a, b):
        a = getattr(a, "variable", a)
        b = getattr(b, "variable", b)

        return a.dims == b.dims and (a._data is b._data
                                     or equiv(a.data, b.data))

    if isinstance(a, Variable):
        allclose = compat_variable(a, b)
        assert allclose, formatting.diff_array_repr(a, b, compat=equiv)
    elif isinstance(a, DataArray):
        allclose = utils.dict_equiv(
            a.coords, b.coords, compat=compat_variable) and compat_variable(
                a.variable, b.variable)
        assert allclose, formatting.diff_array_repr(a, b, compat=equiv)
    elif isinstance(a, Dataset):
        allclose = a._coord_names == b._coord_names and utils.dict_equiv(
            a.variables, b.variables, compat=compat_variable)
        assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv)
    else:
        raise TypeError("{} not supported by assertion comparison".format(
            type(a)))
Exemplo n.º 2
0
    def test_to_dataset(self, tmpdir_factory, bout_xyt_example_files):
        path = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1)
        ds = open_boutdataset(datapath=path,
                              inputfilepath=None,
                              keep_xboundaries=False)
        da = ds['n']

        new_ds = da.bout.to_dataset()

        assert dict_equiv(ds.attrs, new_ds.attrs)
        assert dict_equiv(ds.metadata, new_ds.metadata)
Exemplo n.º 3
0
    def test_to_dataset(self, bout_xyt_example_files):
        dataset_list = bout_xyt_example_files(None, nxpe=3, nype=4, nt=1)
        with pytest.warns(UserWarning):
            ds = open_boutdataset(
                datapath=dataset_list, inputfilepath=None, keep_xboundaries=False
            )
        da = ds["n"]

        new_ds = da.bout.to_dataset()

        assert dict_equiv(ds.attrs, new_ds.attrs)
        assert dict_equiv(ds.metadata, new_ds.metadata)
Exemplo n.º 4
0
 def test_dict_equiv(self):
     x = OrderedDict()
     x['a'] = 3
     x['b'] = np.array([1, 2, 3])
     y = OrderedDict()
     y['b'] = np.array([1.0, 2.0, 3.0])
     y['a'] = 3
     self.assertTrue(utils.dict_equiv(x, y))  # two nparrays are equal
     y['b'] = [1, 2, 3]  # np.array not the same as a list
     self.assertTrue(utils.dict_equiv(x, y))  # nparray == list
     x['b'] = [1.0, 2.0, 3.0]
     self.assertTrue(utils.dict_equiv(x, y))  # list vs. list
     x['c'] = None
     self.assertFalse(utils.dict_equiv(x, y))  # new key in x
     x['c'] = np.nan
     y['c'] = np.nan
     self.assertTrue(utils.dict_equiv(x, y))  # as intended, nan is nan
     x['c'] = np.inf
     y['c'] = np.inf
     self.assertTrue(utils.dict_equiv(x, y))  # inf == inf
     y = dict(y)
     self.assertTrue(utils.dict_equiv(
         x, y))  # different dictionary types are fine
     y['b'] = 3 * np.arange(3)
     self.assertFalse(utils.dict_equiv(x,
                                       y))  # not equal when arrays differ
Exemplo n.º 5
0
 def assert_variable_attrs_equal(self, ds_a, ds_b):
     # Does not test whether ds_a and ds_b have the same set of variables
     for var_name in ds_a.variables.keys():
         attrs_a = ds_a[var_name].attrs
         attrs_b = ds_b[var_name].attrs
         assert dict_equiv(attrs_a, attrs_b), diff_attrs_repr(
             attrs_a, attrs_b, "identical")
Exemplo n.º 6
0
 def assert_global_attrs_close(self, attrs_a, attrs_b):
     attrs_a = attrs_a.copy()
     attrs_b = attrs_b.copy()
     self._assert_time_attrs_close(attrs_a, attrs_b)
     self._assert_numerical_attrs_close(attrs_a, attrs_b)
     assert dict_equiv(attrs_a,
                       attrs_b), diff_attrs_repr(attrs_a, attrs_b,
                                                 "identical")
Exemplo n.º 7
0
 def test_dict_equiv(self):
     x = OrderedDict()
     x["a"] = 3
     x["b"] = np.array([1, 2, 3])
     y = OrderedDict()
     y["b"] = np.array([1.0, 2.0, 3.0])
     y["a"] = 3
     assert utils.dict_equiv(x, y)  # two nparrays are equal
     y["b"] = [1, 2, 3]  # np.array not the same as a list
     assert utils.dict_equiv(x, y)  # nparray == list
     x["b"] = [1.0, 2.0, 3.0]
     assert utils.dict_equiv(x, y)  # list vs. list
     x["c"] = None
     assert not utils.dict_equiv(x, y)  # new key in x
     x["c"] = np.nan
     y["c"] = np.nan
     assert utils.dict_equiv(x, y)  # as intended, nan is nan
     x["c"] = np.inf
     y["c"] = np.inf
     assert utils.dict_equiv(x, y)  # inf == inf
     y = dict(y)
     assert utils.dict_equiv(x, y)  # different dictionary types are fine
     y["b"] = 3 * np.arange(3)
     assert not utils.dict_equiv(x, y)  # not equal when arrays differ
Exemplo n.º 8
0
 def test_dict_equiv(self):
     x = OrderedDict()
     x['a'] = 3
     x['b'] = np.array([1, 2, 3])
     y = OrderedDict()
     y['b'] = np.array([1.0, 2.0, 3.0])
     y['a'] = 3
     assert utils.dict_equiv(x, y)  # two nparrays are equal
     y['b'] = [1, 2, 3]  # np.array not the same as a list
     assert utils.dict_equiv(x, y)  # nparray == list
     x['b'] = [1.0, 2.0, 3.0]
     assert utils.dict_equiv(x, y)  # list vs. list
     x['c'] = None
     assert not utils.dict_equiv(x, y)  # new key in x
     x['c'] = np.nan
     y['c'] = np.nan
     assert utils.dict_equiv(x, y)  # as intended, nan is nan
     x['c'] = np.inf
     y['c'] = np.inf
     assert utils.dict_equiv(x, y)  # inf == inf
     y = dict(y)
     assert utils.dict_equiv(x, y)  # different dictionary types are fine
     y['b'] = 3 * np.arange(3)
     assert not utils.dict_equiv(x, y)  # not equal when arrays differ
Exemplo n.º 9
0
 def test_dict_equiv(self):
     x = OrderedDict()
     x["a"] = 3
     x["b"] = np.array([1, 2, 3])
     y = OrderedDict()
     y["b"] = np.array([1.0, 2.0, 3.0])
     y["a"] = 3
     self.assertTrue(utils.dict_equiv(x, y))  # two nparrays are equal
     y["b"] = [1, 2, 3]  # np.array not the same as a list
     self.assertTrue(utils.dict_equiv(x, y))  # nparray == list
     x["b"] = [1.0, 2.0, 3.0]
     self.assertTrue(utils.dict_equiv(x, y))  # list vs. list
     x["c"] = None
     self.assertFalse(utils.dict_equiv(x, y))  # new key in x
     x["c"] = np.nan
     y["c"] = np.nan
     self.assertTrue(utils.dict_equiv(x, y))  # as intended, nan is nan
     x["c"] = np.inf
     y["c"] = np.inf
     self.assertTrue(utils.dict_equiv(x, y))  # inf == inf
     y = dict(y)
     self.assertTrue(utils.dict_equiv(x, y))  # different dictionary types are fine
     y["b"] = 3 * np.arange(3)
     self.assertFalse(utils.dict_equiv(x, y))  # not equal when arrays differ
Exemplo n.º 10
0
def _dataset_multi_concat(
    datasets,
    dim,
    data_vars,
    coords,
    compat,
    positions,
    join="outer",
):
    """
    Concatenate a sequence of datasets along a dimension, trying concatenation along alternate dimensions when the 
    chosen dimension is not present. This function is based on _dataset_concat from xarray.core.concat.py in xarray 
    0.15. It includes a modification to drop mismatched coordinates from datasets instead of throwing a ValueError. 
    This drop removes the variable from coordinates, but it remains a variable in the dataset.
    """
    # Make sure we're working on a copy (we'll be loading variables)
    datasets = [ds.copy() for ds in datasets]

    # determine what dimensions we will be concatenating over, including the preferred dim and any alternatives when
    # the preferred dim is absent
    dims = _find_concat_dims(datasets, dim)
    dims, coordinates = _calc_concat_dims_coords(dims)

    datasets = align(*datasets, join=join, copy=False, exclude=dims)

    dim_coords, dims_sizes, coord_names, data_names = _parse_datasets(datasets)
    dim_names = set(dim_coords)
    unlabeled_dims = dim_names - coord_names
    both_data_and_coords = coord_names & data_names
    if both_data_and_coords:
        # Instead of throwing a ValueError, make the coordinates match by removing the mismatched coordinate
        for ds in datasets:
            for variable in both_data_and_coords:
                if variable in ds.coords:
                    # This makes the variable no longer a coordinate, but does not remove it from the dataset entirely
                    ds._coord_names.remove(variable)
                    coord_names.discard(variable)

    # we don't want the concat dimensions in the result dataset yet
    for dim in dims:
        dim_coords.pop(dim, None)
        dims_sizes.pop(dim, None)

        # case where concat dimension is a coordinate or data_var but not a dimension
        if (dim in coord_names or dim in data_names) and dim not in dim_names:
            datasets = [ds.expand_dims(dim) for ds in datasets]

    # determine which variables to concatenate
    concat_over, equals, concat_dim_lengths = _calc_concat_over(
        datasets, dims, dim_names, data_vars, coords, compat)

    # determine which variables to merge, and then merge them according to compat
    variables_to_merge = (coord_names | data_names) - concat_over - dim_names

    result_vars = {}
    if variables_to_merge:
        to_merge = {var: [] for var in variables_to_merge}

        for ds in datasets:
            for var in variables_to_merge:
                if var in ds:
                    to_merge[var].append(ds.variables[var])

        for var in variables_to_merge:
            result_vars[var] = unique_variable(var,
                                               to_merge[var],
                                               compat=compat,
                                               equals=equals.get(var, None))
    else:
        result_vars = {}

    result_vars.update(dim_coords)

    # assign attrs and encoding from first dataset
    result_attrs = datasets[0].attrs
    result_encoding = datasets[0].encoding

    # check that global attributes are fixed across all datasets if necessary
    for ds in datasets[1:]:
        if compat == "identical" and not utils.dict_equiv(
                ds.attrs, result_attrs):
            raise ValueError("Dataset global attributes not equal.")

    # we've already verified everything is consistent; now, calculate
    # shared dimension sizes so we can expand the necessary variables
    def ensure_common_dims(vars):
        # ensure each variable with the given name shares the same
        # dimensions and the same shape for all of them except along the
        # concat dimension
        common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))

        # find the first concat dimension available in vars
        concat_dim = [x for x in dims if x in common_dims][0]
        if not concat_dim:
            # none of the concat dims are present - add the first one
            dim = dims[0]
            common_dims = (dim, ) + common_dims
            concat_dim = dim

        for var, dim_len in zip(vars, concat_dim_lengths[concat_dim]):
            if var.dims != common_dims:
                common_shape = tuple(
                    dims_sizes.get(d, dim_len) for d in common_dims)
                var = var.expand_dims(common_dims, common_shape)
            yield var

    # stack up each variable to fill-out the dataset (in order)
    # n.b. this loop preserves variable order, needed for groupby.
    for k in datasets[0].variables:
        if k in concat_over:
            try:
                vars = ensure_common_dims([ds.variables[k] for ds in datasets])
            except KeyError:
                raise ValueError("%r is not present in all datasets." % k)
            # get the dimension to concatenate this variable on - choose first applicable dim from dims
            dim = _get_concat_dim(dims, [ds.variables[k] for ds in datasets])
            combined = concat_vars(vars, dim, positions)
            assert isinstance(combined, Variable)
            result_vars[k] = combined

    result = Dataset(result_vars, attrs=result_attrs)
    absent_coord_names = coord_names - set(result.variables)
    if absent_coord_names:
        raise ValueError(
            "Variables %r are coordinates in some datasets but not others." %
            absent_coord_names)
    # current versions of dataset.set_coords and dataset.drop force a _assert_all_in_dataset check that we don't want
    # xarray 0.15 has the option to disable this via errors='ignore', but for now just call the underlying logic
    #result = result.set_coords(coord_names, errors='ignore')
    result._coord_names.update(coord_names)
    result.encoding = result_encoding

    #result = result.drop(unlabeled_dims, errors='ignore')
    drop = set(unlabeled_dims)
    variables = OrderedDict(
        (k, v) for k, v in iteritems(result._variables) if k not in drop)
    coord_names = set(k for k in result._coord_names if k in variables)
    result._replace_vars_and_dims(variables, coord_names)

    for coord in coordinates:
        if coord:
            # add concat dimension last to ensure that its in the final Dataset
            result[coord.name] = coord

    return result