def test_proper_unstack_other_mi(): a = xarray.DataArray( [[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [5, 6, 7, 8]], dims=["row", "col"], coords={ "row": pandas.MultiIndex.from_tuples( [("x0", "w0"), ("x0", "w1"), ("x1", "w0"), ("x1", "w1")], names=["x", "w"], ), "col": pandas.MultiIndex.from_tuples( [("y0", "z0"), ("y0", "z1"), ("y1", "z0"), ("y1", "z1")], names=["y", "z"], ), }, ) b = proper_unstack(a, "row") c = xarray.DataArray( [[[1, 5], [1, 5]], [[2, 6], [2, 6]], [[3, 7], [3, 7]], [[4, 8], [4, 8]]], dims=["col", "x", "w"], coords={ "col": pandas.MultiIndex.from_tuples( [("y0", "z0"), ("y0", "z1"), ("y1", "z0"), ("y1", "z1")], names=["y", "z"], ), "x": ["x0", "x1"], "w": ["w0", "w1"], }, ) xarray.testing.assert_equal(b, c)
def test_proper_unstack_order(): # Note: using MultiIndex.from_tuples is NOT the same thing as # round-tripping DataArray.stack().unstack(), as the latter is not # affected by the re-ordering issue index = pandas.MultiIndex.from_tuples( [ ["x1", "first"], ["x1", "second"], ["x1", "third"], ["x1", "fourth"], ["x0", "first"], ["x0", "second"], ["x0", "third"], ["x0", "fourth"], ], names=["x", "count"], ) xa = xarray.DataArray(numpy.arange(8), dims=["dim_0"], coords={"dim_0": index}) a = proper_unstack(xa, "dim_0") b = xarray.DataArray( [[0, 1, 2, 3], [4, 5, 6, 7]], dims=["x", "count"], coords={"x": ["x1", "x0"], "count": ["first", "second", "third", "fourth"]}, ) xarray.testing.assert_equal(a, b) with pytest.raises(AssertionError): # Order is different xarray.testing.assert_equal(a, xa.unstack("dim_0"))
def test_proper_unstack_other_mi(): a = xarray.DataArray( [[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [5, 6, 7, 8]], dims=['row', 'col'], coords={'row': pandas.MultiIndex.from_tuples([('x0', 'w0'), ('x0', 'w1'), ('x1', 'w0'), ('x1', 'w1')], names=['x', 'w']), 'col': pandas.MultiIndex.from_tuples([('y0', 'z0'), ('y0', 'z1'), ('y1', 'z0'), ('y1', 'z1')], names=['y', 'z'])}) b = proper_unstack(a, 'row') c = xarray.DataArray( [[[1, 5], [1, 5]], [[2, 6], [2, 6]], [[3, 7], [3, 7]], [[4, 8], [4, 8]]], dims=['col', 'x', 'w'], coords={'col': pandas.MultiIndex.from_tuples([('y0', 'z0'), ('y0', 'z1'), ('y1', 'z0'), ('y1', 'z1')], names=['y', 'z']), 'x': ['x0', 'x1'], 'w': ['w0', 'w1']}) xarray.testing.assert_equal(b, c)
def test_proper_unstack_order(): # Note: using MultiIndex.from_tuples is NOT the same thing as # round-tripping DataArray.stack().unstack(), as the latter is not # affected by the re-ordering issue index = pandas.MultiIndex.from_tuples( [['x1', 'first'], ['x1', 'second'], ['x1', 'third'], ['x1', 'fourth'], ['x0', 'first'], ['x0', 'second'], ['x0', 'third'], ['x0', 'fourth']], names=['x', 'count']) xa = xarray.DataArray( numpy.arange(8), dims=['dim_0'], coords={'dim_0': index}) a = proper_unstack(xa, 'dim_0') b = xarray.DataArray( [[0, 1, 2, 3], [4, 5, 6, 7]], dims=['x', 'count'], coords={'x': ['x1', 'x0'], 'count': ['first', 'second', 'third', 'fourth']}) xarray.testing.assert_equal(a, b) with pytest.raises(AssertionError): # Order is different xarray.testing.assert_equal(a, xa.unstack('dim_0'))
def test_proper_unstack_mixed_coords(): a = xarray.DataArray([[0, 1, 2, 3], [4, 5, 6, 7]], dims=['r', 'c'], coords={'r': [1, 'x0'], 'c': [1, 2.2, '3', 'fourth']}) b = a.stack(s=['r', 'c']) c = proper_unstack(b, 's') xarray.testing.assert_equal(a, c)
def test_proper_unstack_mixed_coords(): a = xarray.DataArray( [[0, 1, 2, 3], [4, 5, 6, 7]], dims=["r", "c"], coords={"r": [1, "x0"], "c": [1, 2.2, "3", "fourth"]}, ) b = a.stack(s=["r", "c"]) c = proper_unstack(b, "s") xarray.testing.assert_equal(a, c)
def test_proper_unstack_dtype(): """Test that we don't accidentally end up with dtype=O for the coords """ a = xarray.DataArray( [[0, 1, 2, 3], [4, 5, 6, 7]], dims=['r', 'c'], coords={'r': pandas.to_datetime(['2000/01/01', '2000/01/02']), 'c': [1, 2, 3, 4]}) b = a.stack(s=['r', 'c']) c = proper_unstack(b, 's') xarray.testing.assert_equal(a, c)
def test_proper_unstack_dtype(): """Test that we don't accidentally end up with dtype=O for the coords""" a = xarray.DataArray( [[0, 1, 2, 3], [4, 5, 6, 7]], dims=["r", "c"], coords={ "r": pandas.to_datetime(["2000/01/01", "2000/01/02"]), "c": [1, 2, 3, 4], }, ) b = a.stack(s=["r", "c"]) c = proper_unstack(b, "s") xarray.testing.assert_equal(a, c)
def _write_csv_dataarray(array: xarray.DataArray, buf: IO) -> None: """Write :class:`xarray.DataArray` to buffer""" if array.ndim == 0: # 0D (scalar) array buf.write(f"{array.values}\n") return # Keep track of non-index coordinates # Note that scalar (a-dimensional) coords are silently discarded coord_renames = {} for k, v in array.coords.items(): if len(v.dims) > 1: raise ValueError( f"Multi-dimensional coord '{k}' is not supported by the NDCSV format" ) if len(v.dims) == 1 and v.dims[0] != k: coord_renames[k] = f"{k} ({v.dims[0]})" array = array.rename(coord_renames) if array.ndim > 2: # Automatically stack dims beyond the first. # In the case where there's already a MultiIndex on a dim beyond # the first, first unstack them and then stack them again back all # together. for dim in array.dims[1:]: if isinstance(array.get_index(dim), pandas.MultiIndex): # Note: unstacked dims end up on the right array = proper_unstack(array, dim) # The __columns__ label is completely arbitrary and we're going # to lose it in a few moments when dumping to CSV. array = array.stack(__columns__=array.dims[1:]) # non-index coords are lost when converting to pandas. # Incorporate them into the MultiIndex for dim in array.dims: from_mindex = False if isinstance(array.coords[dim].to_index(), pandas.MultiIndex): array = array.reset_index(dim) from_mindex = True elif dim not in array.coords: # Force default RangeIndex array.coords[dim] = array.coords[dim] if list(array[dim].coords) != [dim]: indexes = {dim if from_mindex else f"{dim}_mindex": list(array[dim].coords)} array = array.set_index(indexes) # type: ignore _write_csv_pandas(array.to_pandas(), buf)
def test_proper_unstack_dataset(): a = xarray.DataArray( [[1, 2, 3, 4], [5, 6, 7, 8]], dims=['x', 'col'], coords={'x': ['x0', 'x1'], 'col': pandas.MultiIndex.from_tuples([('u0', 'v0'), ('u0', 'v1'), ('u1', 'v0'), ('u1', 'v1')], names=['u', 'v'])}) xa = xarray.Dataset({'foo': a, 'bar': ('w', [1, 2]), 'baz': numpy.pi}) b = proper_unstack(xa, 'col') c = xarray.DataArray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dims=['x', 'u', 'v'], coords={'x': ['x0', 'x1'], 'u': ['u0', 'u1'], 'v': ['v0', 'v1']}) d = xarray.Dataset({'foo': c, 'bar': ('w', [1, 2]), 'baz': numpy.pi}) xarray.testing.assert_equal(b, d) for c in b.coords: assert b.coords[c].dtype.kind == 'U'
def test_proper_unstack_dataset(): a = xarray.DataArray( [[1, 2, 3, 4], [5, 6, 7, 8]], dims=["x", "col"], coords={ "x": ["x0", "x1"], "col": pandas.MultiIndex.from_tuples( [("u0", "v0"), ("u0", "v1"), ("u1", "v0"), ("u1", "v1")], names=["u", "v"], ), }, ) xa = xarray.Dataset({"foo": a, "bar": ("w", [1, 2]), "baz": numpy.pi}) b = proper_unstack(xa, "col") c = xarray.DataArray( [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dims=["x", "u", "v"], coords={"x": ["x0", "x1"], "u": ["u0", "u1"], "v": ["v0", "v1"]}, ) d = xarray.Dataset({"foo": c, "bar": ("w", [1, 2]), "baz": numpy.pi}) xarray.testing.assert_equal(b, d) for c in b.coords: assert b.coords[c].dtype.kind == "U"
def _unpack(xa: DataArray, dim: Hashable, unstack: bool = True) -> DataArray: """Deal with MultiIndex and non-index coords :param DataArray xa: array where all MultiIndex'es have been reset :param str dim: dim to unstack (dim_0 or dim_1). This function does nothing if the dim is not present at all :param bool unstack: If True, unstack all index dims using first-seen order """ rename_map = {} dims = [] index_coords = [] nonindex_coords = [] for k, v in xa.coords.items(): assert len(v.dims) == 1 if v.dims[0] == dim: # Non-index coords are formatted as `name (dim)` m = re.match(r"(.+) \((.+)\)$", k) if m: coord_name, coord_dim = m.group(1), m.group(2) # Non-index coordinate rename_map[k] = coord_name nonindex_coords.append((k, coord_dim)) if coord_dim not in dims: dims.append(coord_dim) else: # Stacked dimension index_coords.append(k) if k not in dims: dims.append(k) # If multiple index coordinates, set a MultiIndex for them # Leave non-index coordinates out if len(dims) > 1: # Unstack MultiIndex, using a first-seen order xa = xa.set_index({dim: index_coords}) # type: ignore if unstack: xa = proper_unstack(xa, dim) # Now non-index coords will have become multi-dimensional # Drop extra dims if there is no ambiguity, otherwise raise error for coord, coord_dim in nonindex_coords: cvalue = xa.coords[coord] slice0 = cvalue.isel( { other_dim: 0 for other_dim in cvalue.dims if other_dim != coord_dim }, drop=True, ) if (cvalue == slice0).all(): xa.coords[coord] = slice0 else: raise ValueError( f"Non-index coord {coord} has different values for the same " f"value of its dimension {coord_dim}") # Finally rename non-index coords xa = xa.rename(rename_map) elif len(nonindex_coords) == 1 and not index_coords: # Special case where the dim will be y (x) assert len(dims) == 1 assert len(rename_map) == 1 new_dim = nonindex_coords[0][1] old_dim, coord_name = next(iter(rename_map.items())) coord_value = xa.coords[xa.dims[0]] xa = xa.rename({old_dim: new_dim}) del xa.coords[xa.dims[0]] xa.coords[coord_name] = (xa.dims[0], coord_value.data) else: assert len(dims) == 1 # Rename dim_0, dim_1 as index coord (if necessary) xa = xa.rename({dim: dims[0]}) # Finally rename non-index coords xa = xa.rename(rename_map) return xa