Exemplo n.º 1
0
class TestCombineAuto:
    def test_combine_by_coords(self):
        objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        actual = combine_by_coords([actual])
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset({
                'x': ('a', [0]),
                'y': ('a', [0]),
                'a': [0]
            }),
            Dataset({
                'x': ('a', [1]),
                'y': ('a', [1]),
                'a': [1]
            })
        ]
        actual = combine_by_coords(objs)
        expected = Dataset({
            'x': ('a', [0, 1]),
            'y': ('a', [0, 1]),
            'a': [0, 1]
        })
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': [0, 1], 'y': [0, 1]})
        assert_equal(actual, expected)

        objs = [Dataset({'x': 0}), Dataset({'x': 1})]
        with raises_regex(ValueError, 'Could not find any dimension '
                          'coordinates'):
            combine_by_coords(objs)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})]
        with raises_regex(ValueError, 'Every dimension needs a coordinate'):
            combine_by_coords(objs)

        def test_empty_input(self):
            assert_identical(Dataset(), combine_by_coords([]))

    @pytest.mark.parametrize("join, expected", [
        ('outer', Dataset({
            'x': [0, 1],
            'y': [0, 1]
        })),
        ('inner', Dataset({
            'x': [0, 1],
            'y': []
        })),
        ('left', Dataset({
            'x': [0, 1],
            'y': [0]
        })),
        ('right', Dataset({
            'x': [0, 1],
            'y': [1]
        })),
    ])
    def test_combine_coords_join(self, join, expected):
        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})]
        actual = combine_nested(objs, concat_dim='x', join=join)
        assert_identical(expected, actual)

    def test_combine_coords_join_exact(self):
        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})]
        with raises_regex(ValueError, 'indexes along dimension'):
            combine_nested(objs, concat_dim='x', join='exact')

    def test_infer_order_from_coords(self):
        data = create_test_data()
        objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
        actual = combine_by_coords(objs)
        expected = data
        assert expected.broadcast_equals(actual)

    def test_combine_leaving_bystander_dimensions(self):
        # Check non-monotonic bystander dimension coord doesn't raise
        # ValueError on combine (https://github.com/pydata/xarray/issues/3150)
        ycoord = ['a', 'c', 'b']

        data = np.random.rand(7, 3)

        ds1 = Dataset(data_vars=dict(data=(['x', 'y'], data[:3, :])),
                      coords=dict(x=[1, 2, 3], y=ycoord))

        ds2 = Dataset(data_vars=dict(data=(['x', 'y'], data[3:, :])),
                      coords=dict(x=[4, 5, 6, 7], y=ycoord))

        expected = Dataset(data_vars=dict(data=(['x', 'y'], data)),
                           coords=dict(x=[1, 2, 3, 4, 5, 6, 7], y=ycoord))

        actual = combine_by_coords((ds1, ds2))
        assert_identical(expected, actual)

    def test_combine_by_coords_previously_failed(self):
        # In the above scenario, one file is missing, containing the data for
        # one year's data for one variable.
        datasets = [
            Dataset({
                'a': ('x', [0]),
                'x': [0]
            }),
            Dataset({
                'b': ('x', [0]),
                'x': [0]
            }),
            Dataset({
                'a': ('x', [1]),
                'x': [1]
            })
        ]
        expected = Dataset({
            'a': ('x', [0, 1]),
            'b': ('x', [0, np.nan])
        }, {'x': [0, 1]})
        actual = combine_by_coords(datasets)
        assert_identical(expected, actual)

    def test_combine_by_coords_still_fails(self):
        # concat can't handle new variables (yet):
        # https://github.com/pydata/xarray/issues/508
        datasets = [
            Dataset({'x': 0}, {'y': 0}),
            Dataset({'x': 1}, {
                'y': 1,
                'z': 1
            })
        ]
        with pytest.raises(ValueError):
            combine_by_coords(datasets, 'y')

    def test_combine_by_coords_no_concat(self):
        objs = [Dataset({'x': 0}), Dataset({'y': 1})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': 0, 'y': 1})
        assert_identical(expected, actual)

        objs = [Dataset({'x': 0, 'y': 1}), Dataset({'y': np.nan, 'z': 2})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': 0, 'y': 1, 'z': 2})
        assert_identical(expected, actual)

    def test_check_for_impossible_ordering(self):
        ds0 = Dataset({'x': [0, 1, 5]})
        ds1 = Dataset({'x': [2, 3]})
        with raises_regex(
                ValueError, "does not have monotonic global indexes"
                " along dimension x"):
            combine_by_coords([ds1, ds0])
Exemplo n.º 2
0
 def test_invalid_time_units_raises_eagerly(self):
     ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})})
     with raises_regex(ValueError, 'unable to decode time'):
         decode_cf(ds)
Exemplo n.º 3
0
 def test_lazy_dataset(self):
     lazy_ds = Dataset({'foo': (('x', 'y'), self.data)})
     assert isinstance(lazy_ds.foo.variable.data, da.Array)
Exemplo n.º 4
0
 def test_combine_coords_join(self, join, expected):
     objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
     actual = combine_nested(objs, concat_dim="x", join=join)
     assert_identical(expected, actual)
Exemplo n.º 5
0
def merge_datasets(
    datasets: Iterable[Dataset],
    bounds: Union[Tuple, None] = None,
    res: Union[Tuple, None] = None,
    nodata: Union[float, None] = None,
    precision: Union[float, None] = None,
    method: Union[str, Callable, None] = None,
) -> DataArray:
    """
    Merge datasets geospatially.

    Uses rasterio.merge.merge:
        https://rasterio.readthedocs.io/en/stable/api/rasterio.merge.html#rasterio.merge.merge

    Parameters
    ----------
    datasets: list
        List of xarray.Dataset's with all geo attributes.
        The first one is assumed to have the same
        CRS, dtype, dimensions, and data_vars as the others in the array.
    bounds: tuple, optional
        Bounds of the output image (left, bottom, right, top).
        If not set, bounds are determined from bounds of input Dataset.
    res: tuple, optional
        Output resolution in units of coordinate reference system.
        If not set, the resolution of the first Dataset is used.
        If a single value is passed, output pixels will be square.
    nodata: float, optional
        nodata value to use in output file.
        If not set, uses the nodata value in the first input Dataset.
    precision: float, optional
        Number of decimal points of precision when computing inverse transform.
    method: str or callable, optional
        See rasterio docs.

    Returns
    -------
    :obj:`xarray.Dataset`:
        The geospatially merged data.
    """

    representative_ds = datasets[0]
    merged_data = {}
    for data_var in representative_ds.data_vars:
        merged_data[data_var] = merge_arrays(
            [dataset[data_var] for dataset in datasets],
            bounds=bounds,
            res=res,
            nodata=nodata,
            precision=precision,
            method=method,
            parse_coordinates=False,
        )
    data_var = list(representative_ds.data_vars)[0]
    xds = Dataset(
        merged_data,
        coords=_make_coords(
            merged_data[data_var],
            merged_data[data_var].rio.transform(),
            merged_data[data_var].shape[-1],
            merged_data[data_var].shape[-2],
        ),
        attrs=representative_ds.attrs,
    )
    xds.rio.write_crs(representative_ds.rio.crs, inplace=True)
    return xds
Exemplo n.º 6
0
 def test_nested_concat_too_many_dims_at_once(self):
     objs = [Dataset({"x": [0], "y": [1]}), Dataset({"y": [0], "x": [1]})]
     with pytest.raises(ValueError, match="not equal across datasets"):
         combine_nested(objs, concat_dim="x", coords="minimal")
Exemplo n.º 7
0
    def test_combine_by_coords(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        actual = combine_by_coords([actual])
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset({
                "x": ("a", [0]),
                "y": ("a", [0]),
                "a": [0]
            }),
            Dataset({
                "x": ("a", [1]),
                "y": ("a", [1]),
                "a": [1]
            }),
        ]
        actual = combine_by_coords(objs)
        expected = Dataset({
            "x": ("a", [0, 1]),
            "y": ("a", [0, 1]),
            "a": [0, 1]
        })
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1], "y": [0, 1]})
        assert_equal(actual, expected)

        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
        with raises_regex(ValueError,
                          "Could not find any dimension coordinates"):
            combine_by_coords(objs)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
        with raises_regex(ValueError, "Every dimension needs a coordinate"):
            combine_by_coords(objs)

        def test_empty_input(self):
            assert_identical(Dataset(), combine_by_coords([]))
Exemplo n.º 8
0
 def test_auto_combine_without_coords(self):
     objs = [Dataset({'foo': ('x', [0])}), Dataset({'foo': ('x', [1])})]
     with pytest.warns(FutureWarning, match="supplied do not have global"):
         auto_combine(objs)
Exemplo n.º 9
0
def test_min_count_dataset(func):
    da = construct_dataarray(2, dtype=float, contains_nan=True, dask=False)
    ds = Dataset({"var1": da}, coords={"scalar": 0})
    actual = getattr(ds, func)(dim="x", skipna=True, min_count=3)["var1"]
    expected = getattr(ds["var1"], func)(dim="x", skipna=True, min_count=3)
    assert_allclose(actual, expected)
Exemplo n.º 10
0
    def test_auto_combine_no_concat(self):
        objs = [Dataset({'x': 0}), Dataset({'y': 1})]
        actual = auto_combine(objs)
        expected = Dataset({'x': 0, 'y': 1})
        assert_identical(expected, actual)

        objs = [Dataset({'x': 0, 'y': 1}), Dataset({'y': np.nan, 'z': 2})]
        actual = auto_combine(objs)
        expected = Dataset({'x': 0, 'y': 1, 'z': 2})
        assert_identical(expected, actual)

        data = Dataset({'x': 0})
        actual = auto_combine([data, data, data], concat_dim=None)
        assert_identical(data, actual)

        # Single object, with a concat_dim explicitly provided
        # Test the issue reported in GH #1988
        objs = [Dataset({'x': 0, 'y': 1})]
        dim = DataArray([100], name='baz', dims='baz')
        actual = auto_combine(objs, concat_dim=dim)
        expected = Dataset({
            'x': ('baz', [0]),
            'y': ('baz', [1])
        }, {'baz': [100]})
        assert_identical(expected, actual)

        # Just making sure that auto_combine is doing what is
        # expected for non-scalar values, too.
        objs = [Dataset({'x': ('z', [0, 1]), 'y': ('z', [1, 2])})]
        dim = DataArray([100], name='baz', dims='baz')
        actual = auto_combine(objs, concat_dim=dim)
        expected = Dataset(
            {
                'x': (('baz', 'z'), [[0, 1]]),
                'y': (('baz', 'z'), [[1, 2]])
            }, {'baz': [100]})
        assert_identical(expected, actual)
Exemplo n.º 11
0
 def test_auto_combine_with_concat_dim(self):
     objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
     with pytest.warns(FutureWarning, match="`concat_dim`"):
         auto_combine(objs, concat_dim='x')
Exemplo n.º 12
0
    def test_auto_combine(self):
        objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
        actual = auto_combine(objs)
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        actual = auto_combine([actual])
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})]
        actual = auto_combine(objs)
        expected = Dataset({'x': [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])),
            Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))]))
        ]
        actual = auto_combine(objs)
        expected = Dataset({'x': ('a', [0, 1]), 'y': ('a', [0, 1])})
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})]
        with raises_regex(ValueError, 'too many .* dimensions'):
            auto_combine(objs)

        objs = [Dataset({'x': 0}), Dataset({'x': 1})]
        with raises_regex(ValueError, 'cannot infer dimension'):
            auto_combine(objs)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})]
        with pytest.raises(KeyError):
            auto_combine(objs)
Exemplo n.º 13
0
 def test_combine_coords_join_exact(self):
     objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})]
     with raises_regex(ValueError, 'indexes along dimension'):
         combine_nested(objs, concat_dim='x', join='exact')
Exemplo n.º 14
0
    def test_combine_by_coords(self):
        objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        actual = combine_by_coords([actual])
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset({
                'x': ('a', [0]),
                'y': ('a', [0]),
                'a': [0]
            }),
            Dataset({
                'x': ('a', [1]),
                'y': ('a', [1]),
                'a': [1]
            })
        ]
        actual = combine_by_coords(objs)
        expected = Dataset({
            'x': ('a', [0, 1]),
            'y': ('a', [0, 1]),
            'a': [0, 1]
        })
        assert_identical(expected, actual)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({'x': [0, 1], 'y': [0, 1]})
        assert_equal(actual, expected)

        objs = [Dataset({'x': 0}), Dataset({'x': 1})]
        with raises_regex(ValueError, 'Could not find any dimension '
                          'coordinates'):
            combine_by_coords(objs)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})]
        with raises_regex(ValueError, 'Every dimension needs a coordinate'):
            combine_by_coords(objs)

        def test_empty_input(self):
            assert_identical(Dataset(), combine_by_coords([]))
Exemplo n.º 15
0
    def test_nested_concat(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        expected = Dataset({"x": [0, 1]})
        actual = combine_nested(objs, concat_dim="x")
        assert_identical(expected, actual)
        actual = combine_nested(objs, concat_dim=["x"])
        assert_identical(expected, actual)

        actual = combine_nested([actual], concat_dim=None)
        assert_identical(expected, actual)

        actual = combine_nested([actual], concat_dim="x")
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = combine_nested(objs, concat_dim="x")
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure combine_nested handles non-sorted variables
        objs = [
            Dataset({
                "x": ("a", [0]),
                "y": ("a", [0])
            }),
            Dataset({
                "y": ("a", [1]),
                "x": ("a", [1])
            }),
        ]
        actual = combine_nested(objs, concat_dim="a")
        expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})]
        actual = combine_nested(objs, concat_dim="x")
        expected = Dataset({"x": [0, 1], "y": [0]})
        assert_identical(expected, actual)
Exemplo n.º 16
0
 def test_invalid_coordinates(self):
     # regression test for GH308
     original = Dataset({"foo": ("t", [1, 2], {"coordinates": "invalid"})})
     actual = conventions.decode_cf(original)
     assert_identical(original, actual)
Exemplo n.º 17
0
 def test_empty_input(self):
     assert_identical(Dataset(), combine_nested([], concat_dim="x"))
Exemplo n.º 18
0
 def test_invalid_time_units_raises_eagerly(self):
     ds = Dataset({"time": ("time", [0, 1], {"units": "foobar since 123"})})
     with raises_regex(ValueError, "unable to decode time"):
         decode_cf(ds)
Exemplo n.º 19
0
class TestCombineAuto:
    def test_combine_by_coords(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        actual = combine_by_coords([actual])
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset({
                "x": ("a", [0]),
                "y": ("a", [0]),
                "a": [0]
            }),
            Dataset({
                "x": ("a", [1]),
                "y": ("a", [1]),
                "a": [1]
            }),
        ]
        actual = combine_by_coords(objs)
        expected = Dataset({
            "x": ("a", [0, 1]),
            "y": ("a", [0, 1]),
            "a": [0, 1]
        })
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": [0, 1], "y": [0, 1]})
        assert_equal(actual, expected)

        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
        with raises_regex(ValueError,
                          "Could not find any dimension coordinates"):
            combine_by_coords(objs)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
        with raises_regex(ValueError, "Every dimension needs a coordinate"):
            combine_by_coords(objs)

        def test_empty_input(self):
            assert_identical(Dataset(), combine_by_coords([]))

    @pytest.mark.parametrize(
        "join, expected",
        [
            ("outer", Dataset({
                "x": [0, 1],
                "y": [0, 1]
            })),
            ("inner", Dataset({
                "x": [0, 1],
                "y": []
            })),
            ("left", Dataset({
                "x": [0, 1],
                "y": [0]
            })),
            ("right", Dataset({
                "x": [0, 1],
                "y": [1]
            })),
        ],
    )
    def test_combine_coords_join(self, join, expected):
        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
        actual = combine_nested(objs, concat_dim="x", join=join)
        assert_identical(expected, actual)

    def test_combine_coords_join_exact(self):
        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
        with raises_regex(ValueError, "indexes along dimension"):
            combine_nested(objs, concat_dim="x", join="exact")

    @pytest.mark.parametrize(
        "combine_attrs, expected",
        [
            ("drop", Dataset({
                "x": [0, 1],
                "y": [0, 1]
            }, attrs={})),
            (
                "no_conflicts",
                Dataset({
                    "x": [0, 1],
                    "y": [0, 1]
                }, attrs={
                    "a": 1,
                    "b": 2
                }),
            ),
            ("override", Dataset({
                "x": [0, 1],
                "y": [0, 1]
            }, attrs={"a": 1})),
        ],
    )
    def test_combine_coords_combine_attrs(self, combine_attrs, expected):
        objs = [
            Dataset({
                "x": [0],
                "y": [0]
            }, attrs={"a": 1}),
            Dataset({
                "x": [1],
                "y": [1]
            }, attrs={
                "a": 1,
                "b": 2
            }),
        ]
        actual = combine_nested(objs,
                                concat_dim="x",
                                join="outer",
                                combine_attrs=combine_attrs)
        assert_identical(expected, actual)

        if combine_attrs == "no_conflicts":
            objs[1].attrs["a"] = 2
            with raises_regex(ValueError, "combine_attrs='no_conflicts'"):
                actual = combine_nested(objs,
                                        concat_dim="x",
                                        join="outer",
                                        combine_attrs=combine_attrs)

    def test_combine_coords_combine_attrs_identical(self):
        objs = [
            Dataset({
                "x": [0],
                "y": [0]
            }, attrs={"a": 1}),
            Dataset({
                "x": [1],
                "y": [1]
            }, attrs={"a": 1}),
        ]
        expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1})
        actual = combine_nested(objs,
                                concat_dim="x",
                                join="outer",
                                combine_attrs="identical")
        assert_identical(expected, actual)

        objs[1].attrs["b"] = 2

        with raises_regex(ValueError, "combine_attrs='identical'"):
            actual = combine_nested(objs,
                                    concat_dim="x",
                                    join="outer",
                                    combine_attrs="identical")

    def test_infer_order_from_coords(self):
        data = create_test_data()
        objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
        actual = combine_by_coords(objs)
        expected = data
        assert expected.broadcast_equals(actual)

    def test_combine_leaving_bystander_dimensions(self):
        # Check non-monotonic bystander dimension coord doesn't raise
        # ValueError on combine (https://github.com/pydata/xarray/issues/3150)
        ycoord = ["a", "c", "b"]

        data = np.random.rand(7, 3)

        ds1 = Dataset(
            data_vars=dict(data=(["x", "y"], data[:3, :])),
            coords=dict(x=[1, 2, 3], y=ycoord),
        )

        ds2 = Dataset(
            data_vars=dict(data=(["x", "y"], data[3:, :])),
            coords=dict(x=[4, 5, 6, 7], y=ycoord),
        )

        expected = Dataset(
            data_vars=dict(data=(["x", "y"], data)),
            coords=dict(x=[1, 2, 3, 4, 5, 6, 7], y=ycoord),
        )

        actual = combine_by_coords((ds1, ds2))
        assert_identical(expected, actual)

    def test_combine_by_coords_previously_failed(self):
        # In the above scenario, one file is missing, containing the data for
        # one year's data for one variable.
        datasets = [
            Dataset({
                "a": ("x", [0]),
                "x": [0]
            }),
            Dataset({
                "b": ("x", [0]),
                "x": [0]
            }),
            Dataset({
                "a": ("x", [1]),
                "x": [1]
            }),
        ]
        expected = Dataset({
            "a": ("x", [0, 1]),
            "b": ("x", [0, np.nan])
        }, {"x": [0, 1]})
        actual = combine_by_coords(datasets)
        assert_identical(expected, actual)

    def test_combine_by_coords_still_fails(self):
        # concat can't handle new variables (yet):
        # https://github.com/pydata/xarray/issues/508
        datasets = [
            Dataset({"x": 0}, {"y": 0}),
            Dataset({"x": 1}, {
                "y": 1,
                "z": 1
            })
        ]
        with pytest.raises(ValueError):
            combine_by_coords(datasets, "y")

    def test_combine_by_coords_no_concat(self):
        objs = [Dataset({"x": 0}), Dataset({"y": 1})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": 0, "y": 1})
        assert_identical(expected, actual)

        objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})]
        actual = combine_by_coords(objs)
        expected = Dataset({"x": 0, "y": 1, "z": 2})
        assert_identical(expected, actual)

    def test_check_for_impossible_ordering(self):
        ds0 = Dataset({"x": [0, 1, 5]})
        ds1 = Dataset({"x": [2, 3]})
        with raises_regex(
                ValueError, "does not have monotonic global indexes"
                " along dimension x"):
            combine_by_coords([ds1, ds0])

    def test_combine_by_coords_incomplete_hypercube(self):
        # test that this succeeds with default fill_value
        x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]})
        x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]})
        x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]})
        actual = combine_by_coords([x1, x2, x3])
        expected = Dataset(
            {"a": (("y", "x"), [[1, 1], [1, np.nan]])},
            coords={
                "y": [0, 1],
                "x": [0, 1]
            },
        )
        assert_identical(expected, actual)

        # test that this fails if fill_value is None
        with pytest.raises(ValueError):
            combine_by_coords([x1, x2, x3], fill_value=None)
Exemplo n.º 20
0
 def test_lazy_dataset(self):
     lazy_ds = Dataset({"foo": (("x", "y"), self.data)})
     assert isinstance(lazy_ds.foo.variable.data, da.Array)
Exemplo n.º 21
0
 def test_empty_input(self):
     assert_identical(Dataset(), combine_by_coords([]))
Exemplo n.º 22
0
    def _write_netcdf(self, ):
        '''
        '''

        data_dict = {
            'sparce_objects': (('sparce_1d_indx', ), self.csr_data),
            'single_layer_flag':
            (('allObjects_unq', ), self.single_layer_clouds),
            'extent': (
                ('allObjects_unq', 'stats'),
                self.objStatistics.extent_stats,
            ),
            'top': (
                ('allObjects_unq', 'stats'),
                self.objStatistics.topHeight[self.unq_msk].astype(float32) /
                1000.,
            ),
            'base': (
                ('allObjects_unq', 'stats'),
                self.objStatistics.baseHeight[self.unq_msk].astype(float32) /
                1000.,
            ),
            'thickness': (
                ('allObjects_unq', 'stats'),
                self.objStatistics.thickness[self.unq_msk].astype(float32) /
                1000.,
            ),
            'lat_bounds': (('allObjects_unq', 'geo_bounds'), self.lat_bounds),
            'lon_bounds': (('allObjects_unq', 'geo_bounds'), self.lon_bounds),
        }

        coords_dict = {
            'sparce_1d_indx': self.sparce_flat_indx.astype(int32),
            'allObjects_unq': self.unq_clds,
            'cloudSat_shape': array(self.cloudObjects.shape),
            'stats': [1, 2, 3, 4],
        }

        _outData = Dataset(data_vars=data_dict, coords=coords_dict)

        #_extent_offset,_extent_scale = self._add_scale_and_offset(_outData.extent.values)
        #_top_offset,_top_scale       = self._add_scale_and_offset(_outData.top.values)
        #_base_offset,_base_scale     = self._add_scale_and_offset(_outData.base.values)
        #_thick_offset,_thick_scale   = self._add_scale_and_offset(_outData.thickness.values)

        _outData.cloudSat_shape.attrs = self.set_var_attributes(
            _outData.cloudSat_shape,
            description='Shape of cloudsat orbit',
        )
        _outData.sparce_1d_indx.attrs = self.set_var_attributes(
            _outData.sparce_1d_indx,
            description='flattend indices of sparce_objects variable',
        )
        _outData.sparce_objects.attrs = self.set_var_attributes(
            _outData.sparce_objects,
            description=
            'cloud objects corresponding to 1d coordinates from 2d cloudsat field',
        )
        _outData.allObjects_unq.attrs = self.set_var_attributes(
            _outData.allObjects_unq,
            description='unique list of cloud objects',
        )
        _outData.stats.attrs = self.set_var_attributes(
            _outData.stats,
            col_1='min',
            col_2='mean',
            col_3='median',
            col_4='max',
        )
        _outData.extent.attrs = self.set_var_attributes(
            _outData.extent,
            description='cloud object along-track extent',
        )
        _outData.top.attrs = self.set_var_attributes(
            _outData.top,
            description='cloud object top height',
        )
        _outData.base.attrs = self.set_var_attributes(
            _outData.base,
            description='cloud object base height',
        )
        _outData.thickness.attrs = self.set_var_attributes(
            _outData.thickness,
            description='cloud object thickness',
        )
        _outData.lat_bounds.attrs = self.set_var_attributes(
            _outData.lat_bounds,
            long_name='cloud object latitude bounds',
            units='degrees north',
        )
        _outData.lon_bounds.attrs = self.set_var_attributes(
            _outData.lon_bounds,
            long_name='cloud object longitude bounds',
            units='degrees east',
        )

        _outData.to_netcdf(
            self._output_file,
            format='NETCDF4',
            encoding={
                'sparce_objects': {
                    'dtype': 'int32'
                },
                'single_layer_flag': {
                    'dtype': 'uint8'
                },
                'stats': {
                    'dtype': 'uint8'
                },
                'cloudSat_shape': {
                    'dtype': 'uint16'
                },
                'extent': {
                    'dtype': 'float32',
                    '_FillValue': -999.,
                    #'scale_factor' : _extent_scale,
                    #'add_offset'   : _extent_offset,
                },
                'top': {
                    'dtype': 'float32',
                    '_FillValue': -999.,
                    #'scale_factor' : _top_scale,
                    #'add_offset'   : _top_offset,
                },
                'base': {
                    'dtype': 'float32',
                    '_FillValue': -999.,
                    #'scale_factor' : _base_scale,
                    #'add_offset'   : _base_offset,
                },
                'thickness': {
                    'dtype': 'float32',
                    '_FillValue': -999.,
                    #'scale_factor' : _thick_scale,
                    #'add_offset'   : _thick_offset,
                },
                'lon_bounds': {
                    'dtype': 'float32',
                    '_FillValue': -999.,
                    #'scale_factor' : _thick_scale,
                    #'add_offset'   : _thick_offset,
                },
                'lat_bounds': {
                    'dtype': 'float32',
                    '_FillValue': -999.,
                    #'scale_factor' : _thick_scale,
                    #'add_offset'   : _thick_offset,
                },
            })
Exemplo n.º 23
0
 def test_combine_coords_join_exact(self):
     objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
     with raises_regex(ValueError, "indexes along dimension"):
         combine_nested(objs, concat_dim="x", join="exact")
Exemplo n.º 24
0
 def test_no_dimension_coords(self):
     ds0 = Dataset({"foo": ("x", [0, 1])})
     ds1 = Dataset({"foo": ("x", [2, 3])})
     with raises_regex(ValueError, "Could not find any dimension"):
         _infer_concat_order_from_coords([ds1, ds0])
Exemplo n.º 25
0
 def test_invalid_coordinates(self):
     # regression test for GH308
     original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})})
     actual = conventions.decode_cf(original)
     assert_identical(original, actual)
Exemplo n.º 26
0
 def test_no_concatenation_needed(self):
     ds = Dataset({"foo": ("x", [0, 1])})
     expected = {(): ds}
     actual, concat_dims = _infer_concat_order_from_coords([ds])
     assert_combined_tile_ids_equal(expected, actual)
     assert concat_dims == []
Exemplo n.º 27
0
    def test_remap_label_indexers(self):
        def test_indexer(data, x, expected_pos, expected_idx=None):
            pos, idx = indexing.remap_label_indexers(data, {"x": x})
            assert_array_equal(pos.get("x"), expected_pos)
            assert_array_equal(idx.get("x"), expected_idx)

        data = Dataset({"x": ("x", [1, 2, 3])})
        mindex = pd.MultiIndex.from_product([["a", "b"], [1, 2], [-1, -2]],
                                            names=("one", "two", "three"))
        mdata = DataArray(range(8), [("x", mindex)])

        test_indexer(data, 1, 0)
        test_indexer(data, np.int32(1), 0)
        test_indexer(data, Variable([], 1), 0)
        test_indexer(mdata, ("a", 1, -1), 0)
        test_indexer(
            mdata,
            ("a", 1),
            [True, True, False, False, False, False, False, False],
            [-1, -2],
        )
        test_indexer(
            mdata,
            "a",
            slice(0, 4, None),
            pd.MultiIndex.from_product([[1, 2], [-1, -2]]),
        )
        test_indexer(
            mdata,
            ("a", ),
            [True, True, True, True, False, False, False, False],
            pd.MultiIndex.from_product([[1, 2], [-1, -2]]),
        )
        test_indexer(mdata, [("a", 1, -1), ("b", 2, -2)], [0, 7])
        test_indexer(mdata, slice("a", "b"), slice(0, 8, None))
        test_indexer(mdata, slice(("a", 1), ("b", 1)), slice(0, 6, None))
        test_indexer(mdata, {"one": "a", "two": 1, "three": -1}, 0)
        test_indexer(
            mdata,
            {
                "one": "a",
                "two": 1
            },
            [True, True, False, False, False, False, False, False],
            [-1, -2],
        )
        test_indexer(
            mdata,
            {
                "one": "a",
                "three": -1
            },
            [True, False, True, False, False, False, False, False],
            [1, 2],
        )
        test_indexer(
            mdata,
            {"one": "a"},
            [True, True, True, True, False, False, False, False],
            pd.MultiIndex.from_product([[1, 2], [-1, -2]]),
        )
Exemplo n.º 28
0
class TestNestedCombine:
    def test_nested_concat(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        expected = Dataset({"x": [0, 1]})
        actual = combine_nested(objs, concat_dim="x")
        assert_identical(expected, actual)
        actual = combine_nested(objs, concat_dim=["x"])
        assert_identical(expected, actual)

        actual = combine_nested([actual], concat_dim=None)
        assert_identical(expected, actual)

        actual = combine_nested([actual], concat_dim="x")
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = combine_nested(objs, concat_dim="x")
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure combine_nested handles non-sorted variables
        objs = [
            Dataset({
                "x": ("a", [0]),
                "y": ("a", [0])
            }),
            Dataset({
                "y": ("a", [1]),
                "x": ("a", [1])
            }),
        ]
        actual = combine_nested(objs, concat_dim="a")
        expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})]
        actual = combine_nested(objs, concat_dim="x")
        expected = Dataset({"x": [0, 1], "y": [0]})
        assert_identical(expected, actual)

    @pytest.mark.parametrize(
        "join, expected",
        [
            ("outer", Dataset({
                "x": [0, 1],
                "y": [0, 1]
            })),
            ("inner", Dataset({
                "x": [0, 1],
                "y": []
            })),
            ("left", Dataset({
                "x": [0, 1],
                "y": [0]
            })),
            ("right", Dataset({
                "x": [0, 1],
                "y": [1]
            })),
        ],
    )
    def test_combine_nested_join(self, join, expected):
        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
        actual = combine_nested(objs, concat_dim="x", join=join)
        assert_identical(expected, actual)

    def test_combine_nested_join_exact(self):
        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})]
        with raises_regex(ValueError, "indexes along dimension"):
            combine_nested(objs, concat_dim="x", join="exact")

    def test_empty_input(self):
        assert_identical(Dataset(), combine_nested([], concat_dim="x"))

    # Fails because of concat's weird treatment of dimension coords, see #2975
    @pytest.mark.xfail
    def test_nested_concat_too_many_dims_at_once(self):
        objs = [Dataset({"x": [0], "y": [1]}), Dataset({"y": [0], "x": [1]})]
        with pytest.raises(ValueError, match="not equal across datasets"):
            combine_nested(objs, concat_dim="x", coords="minimal")

    def test_nested_concat_along_new_dim(self):
        objs = [
            Dataset({
                "a": ("x", [10]),
                "x": [0]
            }),
            Dataset({
                "a": ("x", [20]),
                "x": [0]
            }),
        ]
        expected = Dataset({"a": (("t", "x"), [[10], [20]]), "x": [0]})
        actual = combine_nested(objs, concat_dim="t")
        assert_identical(expected, actual)

        # Same but with a DataArray as new dim, see GH #1988 and #2647
        dim = DataArray([100, 150], name="baz", dims="baz")
        expected = Dataset({
            "a": (("baz", "x"), [[10], [20]]),
            "x": [0],
            "baz": [100, 150]
        })
        actual = combine_nested(objs, concat_dim=dim)
        assert_identical(expected, actual)

    def test_nested_merge(self):
        data = Dataset({"x": 0})
        actual = combine_nested([data, data, data], concat_dim=None)
        assert_identical(data, actual)

        ds1 = Dataset({"a": ("x", [1, 2]), "x": [0, 1]})
        ds2 = Dataset({"a": ("x", [2, 3]), "x": [1, 2]})
        expected = Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]})
        actual = combine_nested([ds1, ds2], concat_dim=None)
        assert_identical(expected, actual)
        actual = combine_nested([ds1, ds2], concat_dim=[None])
        assert_identical(expected, actual)

        tmp1 = Dataset({"x": 0})
        tmp2 = Dataset({"x": np.nan})
        actual = combine_nested([tmp1, tmp2], concat_dim=None)
        assert_identical(tmp1, actual)
        actual = combine_nested([tmp1, tmp2], concat_dim=[None])
        assert_identical(tmp1, actual)

        # Single object, with a concat_dim explicitly provided
        # Test the issue reported in GH #1988
        objs = [Dataset({"x": 0, "y": 1})]
        dim = DataArray([100], name="baz", dims="baz")
        actual = combine_nested(objs, concat_dim=[dim])
        expected = Dataset({
            "x": ("baz", [0]),
            "y": ("baz", [1])
        }, {"baz": [100]})
        assert_identical(expected, actual)

        # Just making sure that auto_combine is doing what is
        # expected for non-scalar values, too.
        objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})]
        dim = DataArray([100], name="baz", dims="baz")
        actual = combine_nested(objs, concat_dim=[dim])
        expected = Dataset(
            {
                "x": (("baz", "z"), [[0, 1]]),
                "y": (("baz", "z"), [[1, 2]])
            },
            {"baz": [100]},
        )
        assert_identical(expected, actual)

    def test_concat_multiple_dims(self):
        objs = [
            [
                Dataset({"a": (("x", "y"), [[0]])}),
                Dataset({"a": (("x", "y"), [[1]])})
            ],
            [
                Dataset({"a": (("x", "y"), [[2]])}),
                Dataset({"a": (("x", "y"), [[3]])})
            ],
        ]
        actual = combine_nested(objs, concat_dim=["x", "y"])
        expected = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])})
        assert_identical(expected, actual)

    def test_concat_name_symmetry(self):
        """Inspired by the discussion on GH issue #2777"""

        da1 = DataArray(name="a", data=[[0]], dims=["x", "y"])
        da2 = DataArray(name="b", data=[[1]], dims=["x", "y"])
        da3 = DataArray(name="a", data=[[2]], dims=["x", "y"])
        da4 = DataArray(name="b", data=[[3]], dims=["x", "y"])

        x_first = combine_nested([[da1, da2], [da3, da4]],
                                 concat_dim=["x", "y"])
        y_first = combine_nested([[da1, da3], [da2, da4]],
                                 concat_dim=["y", "x"])

        assert_identical(x_first, y_first)

    def test_concat_one_dim_merge_another(self):
        data = create_test_data()
        data1 = data.copy(deep=True)
        data2 = data.copy(deep=True)

        objs = [
            [
                data1.var1.isel(dim2=slice(4)),
                data2.var1.isel(dim2=slice(4, 9))
            ],
            [
                data1.var2.isel(dim2=slice(4)),
                data2.var2.isel(dim2=slice(4, 9))
            ],
        ]

        expected = data[["var1", "var2"]]
        actual = combine_nested(objs, concat_dim=[None, "dim2"])
        assert expected.identical(actual)

    def test_auto_combine_2d(self):
        ds = create_test_data

        partway1 = concat([ds(0), ds(3)], dim="dim1")
        partway2 = concat([ds(1), ds(4)], dim="dim1")
        partway3 = concat([ds(2), ds(5)], dim="dim1")
        expected = concat([partway1, partway2, partway3], dim="dim2")

        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]]
        result = combine_nested(datasets, concat_dim=["dim1", "dim2"])
        assert_equal(result, expected)

    def test_auto_combine_2d_combine_attrs_kwarg(self):
        ds = create_test_data

        partway1 = concat([ds(0), ds(3)], dim="dim1")
        partway2 = concat([ds(1), ds(4)], dim="dim1")
        partway3 = concat([ds(2), ds(5)], dim="dim1")
        expected = concat([partway1, partway2, partway3], dim="dim2")

        expected_dict = {}
        expected_dict["drop"] = expected.copy(deep=True)
        expected_dict["drop"].attrs = {}
        expected_dict["no_conflicts"] = expected.copy(deep=True)
        expected_dict["no_conflicts"].attrs = {
            "a": 1,
            "b": 2,
            "c": 3,
            "d": 4,
            "e": 5,
            "f": 6,
        }
        expected_dict["override"] = expected.copy(deep=True)
        expected_dict["override"].attrs = {"a": 1}

        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]]

        datasets[0][0].attrs = {"a": 1}
        datasets[0][1].attrs = {"a": 1, "b": 2}
        datasets[0][2].attrs = {"a": 1, "c": 3}
        datasets[1][0].attrs = {"a": 1, "d": 4}
        datasets[1][1].attrs = {"a": 1, "e": 5}
        datasets[1][2].attrs = {"a": 1, "f": 6}

        with raises_regex(ValueError, "combine_attrs='identical'"):
            result = combine_nested(datasets,
                                    concat_dim=["dim1", "dim2"],
                                    combine_attrs="identical")

        for combine_attrs in expected_dict:
            result = combine_nested(datasets,
                                    concat_dim=["dim1", "dim2"],
                                    combine_attrs=combine_attrs)
            assert_identical(result, expected_dict[combine_attrs])

    def test_combine_nested_missing_data_new_dim(self):
        # Your data includes "time" and "station" dimensions, and each year's
        # data has a different set of stations.
        datasets = [
            Dataset({
                "a": ("x", [2, 3]),
                "x": [1, 2]
            }),
            Dataset({
                "a": ("x", [1, 2]),
                "x": [0, 1]
            }),
        ]
        expected = Dataset(
            {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])},
            {"x": [0, 1, 2]})
        actual = combine_nested(datasets, concat_dim="t")
        assert_identical(expected, actual)

    def test_invalid_hypercube_input(self):
        ds = create_test_data

        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4)]]
        with raises_regex(ValueError, "sub-lists do not have "
                          "consistent lengths"):
            combine_nested(datasets, concat_dim=["dim1", "dim2"])

        datasets = [[ds(0), ds(1)], [[ds(3), ds(4)]]]
        with raises_regex(ValueError, "sub-lists do not have "
                          "consistent depths"):
            combine_nested(datasets, concat_dim=["dim1", "dim2"])

        datasets = [[ds(0), ds(1)], [ds(3), ds(4)]]
        with raises_regex(ValueError, "concat_dims has length"):
            combine_nested(datasets, concat_dim=["dim1"])

    def test_merge_one_dim_concat_another(self):
        objs = [
            [
                Dataset({"foo": ("x", [0, 1])}),
                Dataset({"bar": ("x", [10, 20])})
            ],
            [
                Dataset({"foo": ("x", [2, 3])}),
                Dataset({"bar": ("x", [30, 40])})
            ],
        ]
        expected = Dataset({
            "foo": ("x", [0, 1, 2, 3]),
            "bar": ("x", [10, 20, 30, 40])
        })

        actual = combine_nested(objs, concat_dim=["x", None], compat="equals")
        assert_identical(expected, actual)

        # Proving it works symmetrically
        objs = [
            [Dataset({"foo": ("x", [0, 1])}),
             Dataset({"foo": ("x", [2, 3])})],
            [
                Dataset({"bar": ("x", [10, 20])}),
                Dataset({"bar": ("x", [30, 40])})
            ],
        ]
        actual = combine_nested(objs, concat_dim=[None, "x"], compat="equals")
        assert_identical(expected, actual)

    def test_combine_concat_over_redundant_nesting(self):
        objs = [[Dataset({"x": [0]}), Dataset({"x": [1]})]]
        actual = combine_nested(objs, concat_dim=[None, "x"])
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        objs = [[Dataset({"x": [0]})], [Dataset({"x": [1]})]]
        actual = combine_nested(objs, concat_dim=["x", None])
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        objs = [[Dataset({"x": [0]})]]
        actual = combine_nested(objs, concat_dim=[None, None])
        expected = Dataset({"x": [0]})
        assert_identical(expected, actual)

    @pytest.mark.parametrize("fill_value",
                             [dtypes.NA, 2, 2.0, {
                                 "a": 2,
                                 "b": 1
                             }])
    def test_combine_nested_fill_value(self, fill_value):
        datasets = [
            Dataset({
                "a": ("x", [2, 3]),
                "b": ("x", [-2, 1]),
                "x": [1, 2]
            }),
            Dataset({
                "a": ("x", [1, 2]),
                "b": ("x", [3, -1]),
                "x": [0, 1]
            }),
        ]
        if fill_value == dtypes.NA:
            # if we supply the default, we expect the missing value for a
            # float array
            fill_value_a = fill_value_b = np.nan
        elif isinstance(fill_value, dict):
            fill_value_a = fill_value["a"]
            fill_value_b = fill_value["b"]
        else:
            fill_value_a = fill_value_b = fill_value
        expected = Dataset(
            {
                "a":
                (("t", "x"), [[fill_value_a, 2, 3], [1, 2, fill_value_a]]),
                "b":
                (("t", "x"), [[fill_value_b, -2, 1], [3, -1, fill_value_b]]),
            },
            {"x": [0, 1, 2]},
        )
        actual = combine_nested(datasets,
                                concat_dim="t",
                                fill_value=fill_value)
        assert_identical(expected, actual)
Exemplo n.º 29
0
    def test_concat_loads_variables(self):
        # Test that concat() computes not-in-memory variables at most once
        # and loads them in the output, while leaving the input unaltered.
        d1 = build_dask_array('d1')
        c1 = build_dask_array('c1')
        d2 = build_dask_array('d2')
        c2 = build_dask_array('c2')
        d3 = build_dask_array('d3')
        c3 = build_dask_array('c3')
        # Note: c is a non-index coord.
        # Index coords are loaded by IndexVariable.__init__.
        ds1 = Dataset(data_vars={'d': ('x', d1)}, coords={'c': ('x', c1)})
        ds2 = Dataset(data_vars={'d': ('x', d2)}, coords={'c': ('x', c2)})
        ds3 = Dataset(data_vars={'d': ('x', d3)}, coords={'c': ('x', c3)})

        assert kernel_call_count == 0
        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars='different',
                        coords='different')
        # each kernel is computed exactly once
        assert kernel_call_count == 6
        # variables are loaded in the output
        assert isinstance(out['d'].data, np.ndarray)
        assert isinstance(out['c'].data, np.ndarray)

        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars='all',
                        coords='all')
        # no extra kernel calls
        assert kernel_call_count == 6
        assert isinstance(out['d'].data, dask.array.Array)
        assert isinstance(out['c'].data, dask.array.Array)

        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars=['d'],
                        coords=['c'])
        # no extra kernel calls
        assert kernel_call_count == 6
        assert isinstance(out['d'].data, dask.array.Array)
        assert isinstance(out['c'].data, dask.array.Array)

        out = xr.concat([ds1, ds2, ds3], dim='n', data_vars=[], coords=[])
        # variables are loaded once as we are validing that they're identical
        assert kernel_call_count == 12
        assert isinstance(out['d'].data, np.ndarray)
        assert isinstance(out['c'].data, np.ndarray)

        out = xr.concat([ds1, ds2, ds3],
                        dim='n',
                        data_vars='different',
                        coords='different',
                        compat='identical')
        # compat=identical doesn't do any more kernel calls than compat=equals
        assert kernel_call_count == 18
        assert isinstance(out['d'].data, np.ndarray)
        assert isinstance(out['c'].data, np.ndarray)

        # When the test for different turns true halfway through,
        # stop computing variables as it would not have any benefit
        ds4 = Dataset(data_vars={'d': ('x', [2.0])},
                      coords={'c': ('x', [2.0])})
        out = xr.concat([ds1, ds2, ds4, ds3],
                        dim='n',
                        data_vars='different',
                        coords='different')
        # the variables of ds1 and ds2 were computed, but those of ds3 didn't
        assert kernel_call_count == 22
        assert isinstance(out['d'].data, dask.array.Array)
        assert isinstance(out['c'].data, dask.array.Array)
        # the data of ds1 and ds2 was loaded into numpy and then
        # concatenated to the data of ds3. Thus, only ds3 is computed now.
        out.compute()
        assert kernel_call_count == 24

        # Finally, test that riginals are unaltered
        assert ds1['d'].data is d1
        assert ds1['c'].data is c1
        assert ds2['d'].data is d2
        assert ds2['c'].data is c2
        assert ds3['d'].data is d3
        assert ds3['c'].data is c3
Exemplo n.º 30
0
 def test_combine_nested_but_need_auto_combine(self):
     objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2], 'wall': [0]})]
     with raises_regex(ValueError, 'cannot be combined'):
         combine_nested(objs, concat_dim='x')