def test_dataset_2D_aggregate_partial_hm(self): array = np.random.rand(11, 11) dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array}, kdims=['x', 'y'], vdims=['z']) self.assertEqual(dataset.aggregate(['x'], np.mean), Dataset({'x':self.xs, 'z': np.mean(array, axis=0)}, kdims=['x'], vdims=['z']))
def test_dimension_values_vdim(self): cube = Dataset(self.cube, kdims=['longitude', 'latitude']) self.assertEqual(cube.dimension_values('unknown', flat=False), np.flipud(np.array([[ 0, 4, 8], [ 1, 5, 9], [ 2, 6, 10], [ 3, 7, 11]], dtype=np.int32).T))
def test_dataset_groupby_drop_dims_with_vdim(self): array = np.random.rand(3, 20, 10) ds = Dataset({'x': range(10), 'y': range(20), 'z': range(3), 'Val': array, 'Val2': array*2}, kdims=['x', 'y', 'z'], vdims=['Val', 'Val2']) with DatatypeContext([self.datatype, 'dictionary' , 'dataframe'], (ds, Dataset)): partial = ds.to(Dataset, kdims=['Val'], vdims=['Val2'], groupby='y') self.assertEqual(partial.last['Val'], array[:, -1, :].T.flatten())
def test_dataset_groupby_drop_dims_dynamic_with_vdim(self): array = da.from_array(np.random.rand(3, 20, 10), 3) ds = Dataset({'x': range(10), 'y': range(20), 'z': range(3), 'Val': array, 'Val2': array*2}, kdims=['x', 'y', 'z'], vdims=['Val', 'Val2']) with DatatypeContext([self.datatype, 'dictionary' , 'dataframe'], (ds, Dataset)): partial = ds.to(Dataset, kdims=['Val'], vdims=['Val2'], groupby='y', dynamic=True) self.assertEqual(partial[19]['Val'], array[:, -1, :].T.flatten().compute())
def test_dataset_2D_aggregate_partial_hm_alias(self): array = da.from_array(np.random.rand(11, 11), 3) dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array}, kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')]) self.assertEqual(dataset.aggregate(['X'], np.mean), Dataset({'x':self.xs, 'z': np.mean(array, axis=0).compute()}, kdims=[('x', 'X')], vdims=[('z', 'Z')]))
def test_dataset_2D_reduce_hm_alias(self): array = np.random.rand(11, 11) dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array}, kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')]) self.assertEqual(np.array(dataset.reduce(['x', 'y'], np.mean)), np.mean(array)) self.assertEqual(np.array(dataset.reduce(['X', 'Y'], np.mean)), np.mean(array))
def test_irregular_grid_data_values_inverted_y(self): nx, ny = 20, 5 xs, ys = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)*-1+0.5) zs = np.arange(100).reshape(5, 20) ds = Dataset((xs, ys, zs), ['x', 'y'], 'z') self.assertEqual(ds.dimension_values(2, flat=False), zs) self.assertEqual(ds.interface.coords(ds, 'x'), xs) self.assertEqual(ds.interface.coords(ds, 'y'), ys)
def test_multi_dimension_groupby(self): x, y, z = list('AB'*10), np.arange(20)%3, np.arange(20) ds = Dataset((x, y, z), kdims=['x', 'y'], vdims=['z'], datatype=[self.datatype]) keys = [('A', 0), ('B', 1), ('A', 2), ('B', 0), ('A', 1), ('B', 2)] grouped = ds.groupby(['x', 'y']) self.assertEqual(grouped.keys(), keys) group = Dataset({'z': [5, 11, 17]}, vdims=['z']) self.assertEqual(grouped.last, group)
def test_select_dropped_dimensions_restoration(self): d = np.random.randn(3, 8) da = xr.DataArray(d, name='stuff', dims=['chain', 'value'], coords=dict(chain=range(d.shape[0]), value=range(d.shape[1]))) ds = Dataset(da) t = ds.select(chain=0) self.assertEqual(t.data.dims , dict(chain=1,value=8)) self.assertEqual(t.data.stuff.shape , (1,8))
def test_dataset_groupby_dynamic(self): array = np.random.rand(11, 11) dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array}, kdims=['x', 'y'], vdims=['z']) with DatatypeContext([self.datatype, 'dictionary' , 'dataframe'], dataset): grouped = dataset.groupby('x', dynamic=True) first = Dataset({'y': self.y_ints, 'z': array[:, 0]}, kdims=['y'], vdims=['z']) self.assertEqual(grouped[0], first)
def test_dataset_groupby_dynamic_alias(self): array = da.from_array(np.random.rand(11, 11), 3) dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array}, kdims=[('x', 'X'), ('y', 'Y')], vdims=[('z', 'Z')]) with DatatypeContext([self.datatype, 'dictionary' , 'dataframe'], dataset): grouped = dataset.groupby('X', dynamic=True) first = Dataset({'y': self.y_ints, 'z': array[:, 0].compute()}, kdims=[('y', 'Y')], vdims=[('z', 'Z')]) self.assertEqual(grouped[0], first)
def test_dataset_groupby_multiple_dims(self): dataset = Dataset((range(8), range(8), range(8), range(8), da.from_array(np.random.rand(8, 8, 8, 8), 4)), kdims=['a', 'b', 'c', 'd'], vdims=['Value']) grouped = dataset.groupby(['c', 'd']) keys = list(product(range(8), range(8))) self.assertEqual(list(grouped.keys()), keys) for c, d in keys: self.assertEqual(grouped[c, d], dataset.select(c=c, d=d).reindex(['a', 'b']))
def test_zero_sized_coordinates_range(self): da = xr.DataArray(np.empty((2, 0)), dims=('y', 'x'), coords={'x': [], 'y': [0 ,1]}, name='A') ds = Dataset(da) x0, x1 = ds.range('x') self.assertTrue(np.isnan(x0)) self.assertTrue(np.isnan(x1)) z0, z1 = ds.range('A') self.assertTrue(np.isnan(z0)) self.assertTrue(np.isnan(z1))
def test_xarray_dataset_with_scalar_dim_canonicalize(self): xs = [0, 1] ys = [0.1, 0.2, 0.3] zs = np.array([[[0, 1], [2, 3], [4, 5]]]) xrarr = xr.DataArray(zs, coords={'x': xs, 'y': ys, 't': [1]}, dims=['t', 'y', 'x']) xrds = xr.Dataset({'v': xrarr}) ds = Dataset(xrds, kdims=['x', 'y'], vdims=['v'], datatype=['xarray']) canonical = ds.dimension_values(2, flat=False) self.assertEqual(canonical.ndim, 2) expected = np.array([[0, 1], [2, 3], [4, 5]]) self.assertEqual(canonical, expected)
def test_xarray_dataset_names_and_units(self): xs = [0.1, 0.2, 0.3] ys = [0, 1] zs = np.array([[0, 1], [2, 3], [4, 5]]) da = xr.DataArray(zs, coords=[('x_dim', xs), ('y_dim', ys)], name="data_name", dims=['y_dim', 'x_dim']) da.attrs['long_name'] = "data long name" da.attrs['units'] = "array_unit" da.x_dim.attrs['units'] = "x_unit" da.y_dim.attrs['long_name'] = "y axis long name" dataset = Dataset(da) self.assertEqual(dataset.get_dimension("x_dim"), Dimension("x_dim", unit="x_unit")) self.assertEqual(dataset.get_dimension("y_dim"), Dimension("y_dim", label="y axis long name")) self.assertEqual(dataset.get_dimension("data_name"), Dimension("data_name", label="data long name", unit="array_unit"))
def test_construct_3d_from_xarray(self): try: import xarray as xr except: raise SkipTest("Test requires xarray") zs = np.arange(48).reshape(2, 4, 6) da = xr.DataArray(zs, dims=['z', 'y', 'x'], coords = {'lat': (('y', 'x'), self.ys), 'lon': (('y', 'x'), self.xs), 'z': [0, 1]}, name='A') dataset = Dataset(da, ['lon', 'lat', 'z'], 'A') self.assertEqual(dataset.dimension_values('lon'), self.xs.T.flatten()) self.assertEqual(dataset.dimension_values('lat'), self.ys.T.flatten()) self.assertEqual(dataset.dimension_values('z', expanded=False), np.array([0, 1])) self.assertEqual(dataset.dimension_values('A'), zs.T.flatten())
def test_3d_xarray_with_constant_dim_canonicalized_to_2d(self): try: import xarray as xr except: raise SkipTest("Test requires xarray") zs = np.arange(24).reshape(1, 4, 6) # Construct DataArray with additional constant dimension da = xr.DataArray(zs, dims=['z', 'y', 'x'], coords = {'lat': (('y', 'x'), self.ys), 'lon': (('y', 'x'), self.xs), 'z': [0]}, name='A') # Declare Dataset without declaring constant dimension dataset = Dataset(da, ['lon', 'lat'], 'A') # Ensure that canonicalization drops the constant dimension self.assertEqual(dataset.dimension_values('A', flat=False), zs[0])
def test_construct_from_xarray(self): try: import xarray as xr except: raise SkipTest("Test requires xarray") coords = OrderedDict([('lat', (('y', 'x'), self.ys)), ('lon', (('y', 'x'), self.xs))]) da = xr.DataArray(self.zs, dims=['y', 'x'], coords=coords, name='z') dataset = Dataset(da) # Ensure that dimensions are inferred correctly self.assertEqual(dataset.kdims, [Dimension('lat'), Dimension('lon')]) self.assertEqual(dataset.vdims, [Dimension('z')]) # Ensure that canonicalization works on multi-dimensional coordinates self.assertEqual(dataset.dimension_values('lon', flat=False), self.xs) self.assertEqual(dataset.dimension_values('lat', flat=False), self.ys) self.assertEqual(dataset.dimension_values('z'), self.zs.T.flatten())
def test_dataset_empty_combined_dimension(self): ds = Dataset({('x', 'y'): []}, kdims=['x', 'y']) ds2 = Dataset({'x': [], 'y': []}, kdims=['x', 'y']) self.assertEqual(ds, ds2)
def test_graph_node_info_merge_on_index(self): node_info = Dataset((np.arange(8), np.arange(1, 9)), 'index', 'label') graph = Graph(((self.source, self.target), node_info)) self.assertEqual(graph.nodes.dimension_values(3), node_info.dimension_values(1))
def test_initialize_cube_with_vdims(self): cube = Dataset(self.cube, vdims=['Quantity']) self.assertEqual(cube.dimensions('value', True), ['Quantity'])
def test_select_multi_index(self): cube = Dataset(self.cube) self.assertEqual(cube.select(longitude=0, latitude=0), 5)
def test_select_index(self): cube = Dataset(self.cube) self.assertEqual( cube.select(longitude=0).data.data, np.array([[1, 5, 9]], dtype=np.int32))
def test_getitem_scalar(self): cube = Dataset(self.cube) self.assertEqual(cube[0, 0], 5)
def test_select_multi_slice2(self): cube = Dataset(self.cube) self.assertEqual( cube.select(longitude={0, 2}, latitude={0, 2}).data.data, np.array([[5, 7]], dtype=np.int32))
def test_multi_array_groupby_non_scalar(self): arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular']) with self.assertRaises(ValueError): mds.groupby('x')
def test_multi_array_groupby(self): arrays = [np.array([(1+i, i), (2+i, i), (3+i, i)]) for i in range(2)] mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular']) for i, (k, ds) in enumerate(mds.groupby('y').items()): self.assertEqual(k, arrays[i][0, 1]) self.assertEqual(ds, Dataset([arrays[i]], kdims=['x']))
def test_xarray_irregular_dataset_values(self): ds = Dataset(self.get_multi_dim_irregular_dataset()) values = ds.dimension_values('z', expanded=False) self.assertEqual(values, np.array([0, 1, 2, 3]))
def test_dataset_simple_dict_sorted(self): dataset = Dataset({2: 2, 1: 1, 3: 3}, kdims=['x'], vdims=['y']) self.assertEqual( dataset, Dataset([(i, i) for i in range(1, 4)], kdims=['x'], vdims=['y']))
def test_xarray_dataset_irregular_shape(self): ds = Dataset(self.get_multi_dim_irregular_dataset()) shape = ds.interface.shape(ds, gridded=True) self.assertEqual(shape, (np.nan, np.nan, 3, 4))
def test_irregular_and_regular_coordinate_explicit_irregular_coords_inverted( self): data = self.get_irregular_dataarray(False) ds = Dataset(data, ['xc', 'yc'], vdims='Value') self.assertEqual(ds.kdims, [Dimension('xc'), Dimension('yc')]) self.assertEqual(ds.dimension_values(2, flat=False), data.values[0])
def test_graph_node_info_merge_on_index_partial(self): node_info = Dataset((np.arange(5), np.arange(1, 6)), 'index', 'label') graph = Graph(((self.source, self.target), node_info)) expected = np.array([1., 2., 3., 4., 5., np.NaN, np.NaN, np.NaN]) self.assertEqual(graph.nodes.dimension_values(3), expected)
def test_multi_dict_groupby_non_scalar(self): arrays = [{'x': np.arange(i, i+2), 'y': i} for i in range(2)] mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular']) with self.assertRaises(ValueError): mds.groupby('x')
def test_range_vdim(self): cube = Dataset(self.cube, kdims=['longitude', 'latitude']) self.assertEqual(cube.range('unknown'), (0, 11))
def test_multi_dict_groupby(self): arrays = [{'x': np.arange(i, i+2), 'y': i} for i in range(2)] mds = Dataset(arrays, kdims=['x', 'y'], datatype=['multitabular']) for i, (k, ds) in enumerate(mds.groupby('y').items()): self.assertEqual(k, arrays[i]['y']) self.assertEqual(ds, Dataset([arrays[i]], kdims=['x']))
def test_select_set(self): cube = Dataset(self.cube) self.assertEqual(cube.select(longitude={0, 1}).data.data, np.array([[1, 2], [5, 6], [9, 10]], dtype=np.int32))
def test_getitem_index(self): cube = Dataset(self.cube) self.assertEqual(cube[0].data.data, np.array([[1, 5, 9]], dtype=np.int32))
def test_graph_node_info_no_index(self): node_info = Dataset(np.arange(8), vdims=['Label']) graph = Graph(((self.source, self.target), node_info)) self.assertEqual(graph.nodes.dimension_values(3), node_info.dimension_values(0))
def test_select_multi_slice1(self): cube = Dataset(self.cube) self.assertEqual( cube.select(longitude=(0, 1.01), latitude=(0, 1.01)).data.data, np.array([[5, 6], [9, 10]], dtype=np.int32))
def test_select_set(self): cube = Dataset(self.cube) self.assertEqual( cube.select(longitude={0, 1}).data.data, np.array([[1, 2], [5, 6], [9, 10]], dtype=np.int32))
def test_initialize_cube(self): cube = Dataset(self.cube) self.assertEqual(cube.dimensions(label=True), ['longitude', 'latitude', 'unknown'])
def test_select_multi_slice1(self): cube = Dataset(self.cube) self.assertEqual(cube.select(longitude=(0, 1+self.epsilon), latitude=(0, 1+self.epsilon)).data.data, np.array([[5, 6], [9, 10]], dtype=np.int32))
def test_graph_node_info_no_index_mismatch(self): node_info = Dataset(np.arange(6), vdims=['Label']) with self.assertRaises(ValueError): Graph(((self.source, self.target), node_info))
class TestDimTransforms(ComparisonTestCase): def setUp(self): self.linear_ints = pd.Series(np.arange(1, 11)) self.linear_floats = pd.Series(np.arange(1, 11) / 10.) self.negative = pd.Series(-self.linear_floats) self.repeating = pd.Series( ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A']) self.booleans = self.repeating == 'A' self.dataset = Dataset( (self.linear_ints, self.linear_floats, self.negative, self.repeating, self.booleans), ['int', 'float', 'negative', 'categories', 'booleans']) if dd is not None: ddf = dd.from_pandas(self.dataset.data, npartitions=2) self.dataset_dask = self.dataset.clone(data=ddf) if xr is None: return x = np.arange(2, 62, 3) y = np.arange(2, 12, 2) array = np.arange(100).reshape(5, 20) darray = xr.DataArray(data=array, coords=OrderedDict([('x', x), ('y', y)]), dims=['y', 'x']) self.dataset_xarray = Dataset(darray, vdims=['z']) if da is not None: dask_array = da.from_array(array) dask_da = xr.DataArray(data=dask_array, coords=OrderedDict([('x', x), ('y', y)]), dims=['y', 'x']) self.dataset_xarray_dask = Dataset(dask_da, vdims=['z']) # Assertion helpers def assert_apply(self, expr, expected, skip_dask=False, skip_no_index=False): if np.isscalar(expected): # Pandas input self.assertEqual(expr.apply(self.dataset, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset, keep_index=True), expected) if dd is None: return # Dask input self.assertEqual(expr.apply(self.dataset_dask, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset_dask, keep_index=True), expected) return # Make sure expected is a pandas Series self.assertIsInstance(expected, pd.Series) # Check using dataset backed by pandas DataFrame # keep_index=False if not skip_no_index: np.testing.assert_equal(expr.apply(self.dataset), expected.values) # keep_index=True pd.testing.assert_series_equal(expr.apply(self.dataset, keep_index=True), expected, check_names=False) if skip_dask or dd is None: return # Check using dataset backed by Dask DataFrame expected_dask = dd.from_pandas(expected, npartitions=2) # keep_index=False, compute=False if not skip_no_index: da.assert_eq( expr.apply(self.dataset_dask, compute=False).compute(), expected_dask.values.compute()) # keep_index=True, compute=False dd.assert_eq(expr.apply(self.dataset_dask, keep_index=True, compute=False), expected_dask, check_names=False) # keep_index=False, compute=True if not skip_no_index: np.testing.assert_equal( expr.apply(self.dataset_dask, compute=True), expected_dask.values.compute()) # keep_index=True, compute=True pd.testing.assert_series_equal(expr.apply(self.dataset_dask, keep_index=True, compute=True), expected_dask.compute(), check_names=False) def assert_apply_xarray(self, expr, expected, skip_dask=False, skip_no_index=False): import xarray as xr if np.isscalar(expected): # Pandas input self.assertEqual(expr.apply(self.dataset_xarray, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset_xarray, keep_index=True), expected) return # Make sure expected is a pandas Series self.assertIsInstance(expected, xr.DataArray) # Check using dataset backed by pandas DataFrame # keep_index=False if not skip_no_index: np.testing.assert_equal(expr.apply(self.dataset_xarray), expected.values) # keep_index=True xr.testing.assert_equal( expr.apply(self.dataset_xarray, keep_index=True), expected) if skip_dask or da is None: return # Check using dataset backed by Dask DataFrame expected_da = da.from_array(expected.values) expected_dask = expected.copy() expected_dask.data = expected_da # keep_index=False, compute=False if not skip_no_index: da.assert_eq(expr.apply(self.dataset_xarray_dask, compute=False), expected_dask.data) # keep_index=True, compute=False xr.testing.assert_equal( expr.apply(self.dataset_xarray_dask, keep_index=True, compute=False), expected_dask, ) # keep_index=False, compute=True if not skip_no_index: np.testing.assert_equal( expr.apply(self.dataset_xarray_dask, compute=True), expected_dask.data.compute()) # keep_index=True, compute=True xr.testing.assert_equal( expr.apply(self.dataset_xarray_dask, keep_index=True, compute=True), expected_dask.compute(), ) # Unary operators def test_abs_transform(self): expr = abs(dim('negative')) self.assert_apply(expr, self.linear_floats) def test_neg_transform(self): expr = -dim('negative') self.assert_apply(expr, self.linear_floats) def test_inv_transform(self): expr = ~dim('booleans') self.assert_apply(expr, ~self.booleans) # Binary operators def test_add_transform(self): expr = dim('float') + 1 self.assert_apply(expr, self.linear_floats + 1) def test_div_transform(self): expr = dim('int') / 10. self.assert_apply(expr, self.linear_floats) def test_floor_div_transform(self): expr = dim('int') // 2 self.assert_apply(expr, self.linear_ints // 2) def test_mod_transform(self): expr = dim('int') % 2 self.assert_apply(expr, self.linear_ints % 2) def test_mul_transform(self): expr = dim('float') * 10. self.assert_apply(expr, self.linear_ints.astype('float64')) def test_pow_transform(self): expr = dim('int')**2 self.assert_apply(expr, self.linear_ints**2) def test_sub_transform(self): expr = dim('int') - 10 self.assert_apply(expr, self.linear_ints - 10) # Reverse binary operators def test_radd_transform(self): expr = 1 + dim('float') self.assert_apply(expr, 1 + self.linear_floats) def test_rdiv_transform(self): expr = 10. / dim('int') self.assert_apply(expr, 10. / self.linear_ints) def test_rfloor_div_transform(self): expr = 2 // dim('int') self.assert_apply(expr, 2 // self.linear_ints) def test_rmod_transform(self): expr = 2 % dim('int') self.assert_apply(expr, 2 % self.linear_ints) def test_rmul_transform(self): expr = 10. * dim('float') self.assert_apply(expr, self.linear_ints.astype('float64')) def test_rsub_transform(self): expr = 10 - dim('int') self.assert_apply(expr, 10 - self.linear_ints) # NumPy operations def test_ufunc_transform(self): expr = np.sin(dim('float')) self.assert_apply(expr, np.sin(self.linear_floats)) def test_astype_transform(self): expr = dim('int').astype('float64') self.assert_apply(expr, self.linear_ints.astype('float64')) def test_cumsum_transform(self): expr = dim('float').cumsum() self.assert_apply(expr, self.linear_floats.cumsum()) def test_max_transform(self): expr = dim('float').max() self.assert_apply(expr, self.linear_floats.max()) def test_min_transform(self): expr = dim('float').min() self.assert_apply(expr, self.linear_floats.min()) def test_round_transform(self): expr = dim('float').round() self.assert_apply(expr, self.linear_floats.round()) def test_sum_transform(self): expr = dim('float').sum() self.assert_apply(expr, self.linear_floats.sum()) def test_std_transform(self): expr = dim('float').std(ddof=0) self.assert_apply(expr, self.linear_floats.std(ddof=0)) def test_var_transform(self): expr = dim('float').var(ddof=0) self.assert_apply(expr, self.linear_floats.var(ddof=0)) def test_log_transform(self): expr = dim('float').log() self.assert_apply(expr, np.log(self.linear_floats)) def test_log10_transform(self): expr = dim('float').log10() self.assert_apply(expr, np.log10(self.linear_floats)) # Custom functions def test_str_astype(self): expr = dim('int').str() self.assert_apply(expr, self.linear_ints.astype(str), skip_dask=True) def test_norm_transform(self): expr = dim('int').norm() self.assert_apply(expr, (self.linear_ints - 1) / 9.) def test_iloc_transform_int(self): expr = dim('int').iloc[1] self.assert_apply(expr, self.linear_ints[1]) def test_iloc_transform_slice(self): expr = dim('int').iloc[1:3] self.assert_apply(expr, self.linear_ints[1:3], skip_dask=True) def test_iloc_transform_list(self): expr = dim('int').iloc[[1, 3, 5]] self.assert_apply(expr, self.linear_ints[[1, 3, 5]], skip_dask=True) def test_bin_transform(self): expr = dim('int').bin([0, 5, 10]) expected = pd.Series( [2.5, 2.5, 2.5, 2.5, 2.5, 7.5, 7.5, 7.5, 7.5, 7.5]) self.assert_apply(expr, expected) def test_bin_transform_with_labels(self): expr = dim('int').bin([0, 5, 10], ['A', 'B']) expected = pd.Series( ['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) self.assert_apply(expr, expected) def test_categorize_transform_list(self): expr = dim('categories').categorize(['circle', 'square', 'triangle']) expected = pd.Series((['circle', 'square', 'triangle'] * 3) + ['circle']) # We skip dask because results will depend on partition structure self.assert_apply(expr, expected, skip_dask=True) def test_categorize_transform_dict(self): expr = dim('categories').categorize({ 'A': 'circle', 'B': 'square', 'C': 'triangle' }) expected = pd.Series((['circle', 'square', 'triangle'] * 3) + ['circle']) # We don't skip dask because results are now stable across partitions self.assert_apply(expr, expected) def test_categorize_transform_dict_with_default(self): expr = dim('categories').categorize({ 'A': 'circle', 'B': 'square' }, default='triangle') expected = pd.Series((['circle', 'square', 'triangle'] * 3) + ['circle']) # We don't skip dask because results are stable across partitions self.assert_apply(expr, expected) # Numpy functions def test_digitize(self): expr = dim('int').digitize([1, 5, 10]) expected = pd.Series(np.array([1, 1, 1, 1, 2, 2, 2, 2, 2, 3])).astype('int64') self.assert_apply(expr, expected) def test_isin(self): expr = dim('int').digitize([1, 5, 10]).isin([1, 3]) expected = pd.Series( np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 1], dtype='bool')) self.assert_apply(expr, expected) # Complex expressions def test_multi_operator_expression(self): expr = (((dim('float') - 2) * 3)**2) self.assert_apply(expr, ((self.linear_floats - 2) * 3)**2) def test_multi_dim_expression(self): expr = dim('int') - dim('float') self.assert_apply(expr, self.linear_ints - self.linear_floats) # Repr method def test_dim_repr(self): self.assertEqual(repr(dim('float')), "dim('float')") def test_unary_op_repr(self): self.assertEqual(repr(-dim('float')), "-dim('float')") def test_binary_op_repr(self): self.assertEqual(repr(dim('float') * 2), "dim('float')*2") def test_reverse_binary_op_repr(self): self.assertEqual(repr(1 + dim('float')), "1+dim('float')") def test_ufunc_expression_repr(self): self.assertEqual(repr(np.log(dim('float'))), "dim('float').log()") def test_custom_func_repr(self): self.assertEqual(repr(dim('float').norm()), "dim('float').norm()") def test_multi_operator_expression_repr(self): self.assertEqual(repr(((dim('float') - 2) * 3)**2), "((dim('float')-2)*3)**2") # Applies method def test_multi_dim_expression_applies(self): self.assertEqual((dim('int') - dim('float')).applies(self.dataset), True) def test_multi_dim_expression_not_applies(self): self.assertEqual((dim('foo') - dim('bar')).applies(self.dataset), False) def test_multi_dim_expression_partial_applies(self): self.assertEqual((dim('int') - dim('bar')).applies(self.dataset), False) # Check namespaced expressions def test_pandas_namespace_accessor_repr(self): self.assertEqual(repr(dim('date').df.dt.year), "dim('date').pd.dt.year") def test_pandas_str_accessor(self): expr = dim('categories').df.str.lower() self.assert_apply(expr, self.repeating.str.lower()) def test_pandas_chained_methods(self): expr = dim('int').df.rolling(1).mean() self.assert_apply(expr, self.linear_ints.rolling(1).mean()) @xr_skip def test_xarray_namespace_method_repr(self): self.assertEqual(repr(dim('date').xr.quantile(0.95)), "dim('date').xr.quantile(0.95)") @xr_skip def test_xarray_quantile_method(self): expr = dim('z').xr.quantile(0.95) self.assert_apply_xarray(expr, self.dataset_xarray.data.z.quantile(0.95), skip_dask=True) @xr_skip def test_xarray_roll_method(self): expr = dim('z').xr.roll({'x': 1}, roll_coords=False) self.assert_apply_xarray( expr, self.dataset_xarray.data.z.roll({'x': 1}, roll_coords=False)) @xr_skip def test_xarray_coarsen_method(self): expr = dim('z').xr.coarsen({'x': 4}).mean() self.assert_apply_xarray( expr, self.dataset_xarray.data.z.coarsen({ 'x': 4 }).mean()) # Dynamic arguments def test_dynamic_mul(self): p = Params(a=1) expr = dim('float') * p.param.a self.assertEqual(list(expr.params.values()), [p.param.a]) self.assert_apply(expr, self.linear_floats) p.a = 2 self.assert_apply(expr, self.linear_floats * 2) def test_dynamic_arg(self): p = Params(a=1) expr = dim('float').round(p.param.a) self.assertEqual(list(expr.params.values()), [p.param.a]) self.assert_apply(expr, np.round(self.linear_floats, 1)) p.a = 2 self.assert_apply(expr, np.round(self.linear_floats, 2)) def test_dynamic_kwarg(self): p = Params(a=1) expr = dim('float').round(decimals=p.param.a) self.assertEqual(list(expr.params.values()), [p.param.a]) self.assert_apply(expr, np.round(self.linear_floats, 1)) p.a = 2 self.assert_apply(expr, np.round(self.linear_floats, 2)) def test_pickle(self): expr = (((dim('float') - 2) * 3)**2) expr2 = pickle.loads(pickle.dumps(expr)) self.assertEqual(expr, expr2)
def test_dimension_values_kdim(self): cube = Dataset(self.cube, kdims=['longitude', 'latitude']) self.assertEqual(cube.dimension_values('longitude', expanded=False), np.array([-1, 0, 1, 2], dtype=np.int32))
def test_dataset_range_categorical_dimension_empty(self): ddf = dd.from_pandas(pd.DataFrame({'a': ['1', '2', '3']}), 1) ds = Dataset(ddf).iloc[:0] ds_range = ds.range(0) self.assertTrue(np.isnan(ds_range[0])) self.assertTrue(np.isnan(ds_range[1]))
def test_dimension_values_vdim(self): cube = Dataset(self.cube, kdims=['longitude', 'latitude']) self.assertEqual( cube.dimension_values('unknown', flat=False), np.array([[0, 4, 8], [1, 5, 9], [2, 6, 10], [3, 7, 11]], dtype=np.int32).T)
def test_initialize_cube_with_kdims(self): cube = Dataset(self.cube, kdims=['longitude', 'latitude']) self.assertEqual(cube.dimensions('key', True), ['longitude', 'latitude'])
def test_range_kdim(self): cube = Dataset(self.cube, kdims=['longitude', 'latitude']) self.assertEqual(cube.range('longitude'), (-1, 2))
def test_dataset_empty_list_init_dtypes(self): dataset = Dataset([], kdims=['x'], vdims=['y']) for d in 'xy': self.assertEqual(dataset.dimension_values(d).dtype, np.float64)
def test_dataset_mixed_type_range(self): ds = Dataset((['A', 'B', 'C', None], ), 'A') vmin, vmax = ds.range(0) self.assertTrue(np.isnan(vmin)) self.assertTrue(np.isnan(vmax))
def test_select_index(self): cube = Dataset(self.cube) self.assertEqual(cube.select(longitude=0).data.data, np.array([[1, 5, 9]], dtype=np.int32))
def test_select_multi_slice2(self): cube = Dataset(self.cube) self.assertEqual(cube.select(longitude={0, 2}, latitude={0, 2}).data.data, np.array([[5, 7]], dtype=np.int32))