def test_mask_2d_array(self): array = np.random.rand(4, 3) ds = Dataset(([0, 1, 2], [1, 2, 3, 4], array), ['x', 'y'], 'z') mask = np.array([[1, 1, 0], [1, 0, 1], [0, 1, 1], [1, 0, 1]], dtype='bool') masked = ds.clone(ds.interface.mask(ds, mask)) masked_array = masked.dimension_values(2, flat=False) expected = array.copy() expected[mask] = np.nan self.assertEqual(masked_array, expected)
def test_mask_2d_array_transposed(self): array = np.random.rand(4, 3) da = xr.DataArray(array.T, coords={ 'x': [0, 1, 2], 'y': [0, 1, 2, 3] }, dims=['x', 'y']) ds = Dataset(da, ['x', 'y'], 'z') mask = np.array([[1, 1, 0], [1, 0, 1], [0, 1, 1], [1, 0, 1]], dtype='bool') masked = ds.clone(ds.interface.mask(ds, mask)) masked_array = masked.dimension_values(2, flat=False) expected = array.copy() expected[mask] = np.nan self.assertEqual(masked_array, expected)
class TestDimTransforms(ComparisonTestCase): def setUp(self): self.linear_ints = pd.Series(np.arange(1, 11)) self.linear_floats = pd.Series(np.arange(1, 11) / 10.) self.negative = pd.Series(-self.linear_floats) self.repeating = pd.Series( ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A']) self.dataset = Dataset((self.linear_ints, self.linear_floats, self.negative, self.repeating), ['int', 'float', 'negative', 'categories']) if dd is None: return ddf = dd.from_pandas(self.dataset.data, npartitions=2) self.dataset_dask = self.dataset.clone(data=ddf) # Assertion helpers def check_apply(self, expr, expected, skip_dask=False): if np.isscalar(expected): # Pandas input self.assertEqual(expr.apply(self.dataset, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset, keep_index=True), expected) if dd is None: return # Dask input self.assertEqual(expr.apply(self.dataset_dask, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset_dask, keep_index=True), expected) return # Make sure expected is a pandas Series self.assertIsInstance(expected, pd.Series) # Check using dataset backed by pandas DataFrame # keep_index=False np.testing.assert_equal(expr.apply(self.dataset), expected.values) # keep_index=True pd.testing.assert_series_equal(expr.apply(self.dataset, keep_index=True), expected, check_names=False) if skip_dask or dd is None: return # Check using dataset backed by Dask DataFrame expected_dask = dd.from_pandas(expected, npartitions=2) # keep_index=False, compute=False da.assert_eq(expr.apply(self.dataset_dask, compute=False), expected_dask.values) # keep_index=True, compute=False dd.assert_eq(expr.apply(self.dataset_dask, keep_index=True, compute=False), expected_dask, check_names=False) # keep_index=False, compute=True np.testing.assert_equal(expr.apply(self.dataset_dask, compute=True), expected_dask.values.compute()) # keep_index=True, compute=True pd.testing.assert_series_equal(expr.apply(self.dataset_dask, keep_index=True, compute=True), expected_dask.compute(), check_names=False) # Unary operators def test_abs_transform(self): expr = abs(dim('negative')) self.check_apply(expr, self.linear_floats) def test_neg_transform(self): expr = -dim('negative') self.check_apply(expr, self.linear_floats) # Binary operators def test_add_transform(self): expr = dim('float') + 1 self.check_apply(expr, self.linear_floats + 1) def test_div_transform(self): expr = dim('int') / 10. self.check_apply(expr, self.linear_floats) def test_floor_div_transform(self): expr = dim('int') // 2 self.check_apply(expr, self.linear_ints // 2) def test_mod_transform(self): expr = dim('int') % 2 self.check_apply(expr, self.linear_ints % 2) def test_mul_transform(self): expr = dim('float') * 10. self.check_apply(expr, self.linear_ints.astype('float64')) def test_pow_transform(self): expr = dim('int')**2 self.check_apply(expr, self.linear_ints**2) def test_sub_transform(self): expr = dim('int') - 10 self.check_apply(expr, self.linear_ints - 10) # Reverse binary operators def test_radd_transform(self): expr = 1 + dim('float') self.check_apply(expr, 1 + self.linear_floats) def test_rdiv_transform(self): expr = 10. / dim('int') self.check_apply(expr, 10. / self.linear_ints) def test_rfloor_div_transform(self): expr = 2 // dim('int') self.check_apply(expr, 2 // self.linear_ints) def test_rmod_transform(self): expr = 2 % dim('int') self.check_apply(expr, 2 % self.linear_ints) def test_rmul_transform(self): expr = 10. * dim('float') self.check_apply(expr, self.linear_ints.astype('float64')) def test_rsub_transform(self): expr = 10 - dim('int') self.check_apply(expr, 10 - self.linear_ints) # NumPy operations def test_ufunc_transform(self): expr = np.sin(dim('float')) self.check_apply(expr, np.sin(self.linear_floats)) def test_astype_transform(self): expr = dim('int').astype('float64') self.check_apply(expr, self.linear_ints.astype('float64')) def test_cumsum_transform(self): expr = dim('float').cumsum() self.check_apply(expr, self.linear_floats.cumsum()) def test_max_transform(self): expr = dim('float').max() self.check_apply(expr, self.linear_floats.max()) def test_min_transform(self): expr = dim('float').min() self.check_apply(expr, self.linear_floats.min()) def test_round_transform(self): expr = dim('float').round() self.check_apply(expr, self.linear_floats.round()) def test_sum_transform(self): expr = dim('float').sum() self.check_apply(expr, self.linear_floats.sum()) def test_std_transform(self): expr = dim('float').std() self.check_apply(expr, self.linear_floats.std(ddof=0)) def test_var_transform(self): expr = dim('float').var() self.check_apply(expr, self.linear_floats.var(ddof=0)) def test_log_transform(self): expr = dim('float').log() self.check_apply(expr, np.log(self.linear_floats)) def test_log10_transform(self): expr = dim('float').log10() self.check_apply(expr, np.log10(self.linear_floats)) # Custom functions def test_norm_transform(self): expr = dim('int').norm() self.check_apply(expr, (self.linear_ints - 1) / 9.) def test_bin_transform(self): expr = dim('int').bin([0, 5, 10]) expected = pd.Series( [2.5, 2.5, 2.5, 2.5, 2.5, 7.5, 7.5, 7.5, 7.5, 7.5]) self.check_apply(expr, expected) def test_bin_transform_with_labels(self): expr = dim('int').bin([0, 5, 10], ['A', 'B']) expected = pd.Series( ['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) self.check_apply(expr, expected) def test_categorize_transform_list(self): expr = dim('categories').categorize(['circle', 'square', 'triangle']) expected = pd.Series((['circle', 'square', 'triangle'] * 3) + ['circle']) # We skip dask because results will depend on partition structure self.check_apply(expr, expected, skip_dask=True) def test_categorize_transform_dict(self): expr = dim('categories').categorize({ 'A': 'circle', 'B': 'square', 'C': 'triangle' }) expected = pd.Series((['circle', 'square', 'triangle'] * 3) + ['circle']) # We don't skip dask because results are now stable across partitions self.check_apply(expr, expected) def test_categorize_transform_dict_with_default(self): expr = dim('categories').categorize({ 'A': 'circle', 'B': 'square' }, default='triangle') expected = pd.Series((['circle', 'square', 'triangle'] * 3) + ['circle']) # We don't skip dask because results are stable across partitions self.check_apply(expr, expected) # Numpy functions def test_digitize(self): expr = dim('int').digitize([1, 5, 10]) expected = pd.Series(np.array([1, 1, 1, 1, 2, 2, 2, 2, 2, 3])) self.check_apply(expr, expected) def test_isin(self): expr = dim('int').digitize([1, 5, 10]).isin([1, 3]) expected = pd.Series( np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 1], dtype='bool')) self.check_apply(expr, expected) # Complex expressions def test_multi_operator_expression(self): expr = (((dim('float') - 2) * 3)**2) self.check_apply(expr, ((self.linear_floats - 2) * 3)**2) def test_multi_dim_expression(self): expr = dim('int') - dim('float') self.check_apply(expr, self.linear_ints - self.linear_floats) # Repr method def test_dim_repr(self): self.assertEqual(repr(dim('float')), "'float'") def test_unary_op_repr(self): self.assertEqual(repr(-dim('float')), "-dim('float')") def test_binary_op_repr(self): self.assertEqual(repr(dim('float') * 2), "dim('float')*2") def test_reverse_binary_op_repr(self): self.assertEqual(repr(1 + dim('float')), "1+dim('float')") def test_ufunc_expression_repr(self): self.assertEqual(repr(np.log(dim('float'))), "dim('float').log()") def test_custom_func_repr(self): self.assertEqual(repr(dim('float').norm()), "dim('float').norm()") def test_multi_operator_expression_repr(self): self.assertEqual(repr(((dim('float') - 2) * 3)**2), "((dim('float')-2)*3)**2") # Applies method def test_multi_dim_expression_applies(self): self.assertEqual((dim('int') - dim('float')).applies(self.dataset), True) def test_multi_dim_expression_not_applies(self): self.assertEqual((dim('foo') - dim('bar')).applies(self.dataset), False) def test_multi_dim_expression_partial_applies(self): self.assertEqual((dim('int') - dim('bar')).applies(self.dataset), False)
class TestDimTransforms(ComparisonTestCase): def setUp(self): self.linear_ints = pd.Series(np.arange(1, 11)) self.linear_floats = pd.Series(np.arange(1, 11) / 10.) self.negative = pd.Series(-self.linear_floats) self.repeating = pd.Series( ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A']) self.booleans = self.repeating == 'A' self.dataset = Dataset( (self.linear_ints, self.linear_floats, self.negative, self.repeating, self.booleans), ['int', 'float', 'negative', 'categories', 'booleans']) if dd is not None: ddf = dd.from_pandas(self.dataset.data, npartitions=2) self.dataset_dask = self.dataset.clone(data=ddf) if xr is None: return x = np.arange(2, 62, 3) y = np.arange(2, 12, 2) array = np.arange(100).reshape(5, 20) darray = xr.DataArray(data=array, coords=OrderedDict([('x', x), ('y', y)]), dims=['y', 'x']) self.dataset_xarray = Dataset(darray, vdims=['z']) if da is not None: dask_array = da.from_array(array) dask_da = xr.DataArray(data=dask_array, coords=OrderedDict([('x', x), ('y', y)]), dims=['y', 'x']) self.dataset_xarray_dask = Dataset(dask_da, vdims=['z']) # Assertion helpers def assert_apply(self, expr, expected, skip_dask=False, skip_no_index=False): if np.isscalar(expected): # Pandas input self.assertEqual(expr.apply(self.dataset, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset, keep_index=True), expected) if dd is None: return # Dask input self.assertEqual(expr.apply(self.dataset_dask, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset_dask, keep_index=True), expected) return # Make sure expected is a pandas Series self.assertIsInstance(expected, pd.Series) # Check using dataset backed by pandas DataFrame # keep_index=False if not skip_no_index: np.testing.assert_equal(expr.apply(self.dataset), expected.values) # keep_index=True pd.testing.assert_series_equal(expr.apply(self.dataset, keep_index=True), expected, check_names=False) if skip_dask or dd is None: return # Check using dataset backed by Dask DataFrame expected_dask = dd.from_pandas(expected, npartitions=2) # keep_index=False, compute=False if not skip_no_index: da.assert_eq( expr.apply(self.dataset_dask, compute=False).compute(), expected_dask.values.compute()) # keep_index=True, compute=False dd.assert_eq(expr.apply(self.dataset_dask, keep_index=True, compute=False), expected_dask, check_names=False) # keep_index=False, compute=True if not skip_no_index: np.testing.assert_equal( expr.apply(self.dataset_dask, compute=True), expected_dask.values.compute()) # keep_index=True, compute=True pd.testing.assert_series_equal(expr.apply(self.dataset_dask, keep_index=True, compute=True), expected_dask.compute(), check_names=False) def assert_apply_xarray(self, expr, expected, skip_dask=False, skip_no_index=False): import xarray as xr if np.isscalar(expected): # Pandas input self.assertEqual(expr.apply(self.dataset_xarray, keep_index=False), expected) self.assertEqual(expr.apply(self.dataset_xarray, keep_index=True), expected) return # Make sure expected is a pandas Series self.assertIsInstance(expected, xr.DataArray) # Check using dataset backed by pandas DataFrame # keep_index=False if not skip_no_index: np.testing.assert_equal(expr.apply(self.dataset_xarray), expected.values) # keep_index=True xr.testing.assert_equal( expr.apply(self.dataset_xarray, keep_index=True), expected) if skip_dask or da is None: return # Check using dataset backed by Dask DataFrame expected_da = da.from_array(expected.values) expected_dask = expected.copy() expected_dask.data = expected_da # keep_index=False, compute=False if not skip_no_index: da.assert_eq(expr.apply(self.dataset_xarray_dask, compute=False), expected_dask.data) # keep_index=True, compute=False xr.testing.assert_equal( expr.apply(self.dataset_xarray_dask, keep_index=True, compute=False), expected_dask, ) # keep_index=False, compute=True if not skip_no_index: np.testing.assert_equal( expr.apply(self.dataset_xarray_dask, compute=True), expected_dask.data.compute()) # keep_index=True, compute=True xr.testing.assert_equal( expr.apply(self.dataset_xarray_dask, keep_index=True, compute=True), expected_dask.compute(), ) # Unary operators def test_abs_transform(self): expr = abs(dim('negative')) self.assert_apply(expr, self.linear_floats) def test_neg_transform(self): expr = -dim('negative') self.assert_apply(expr, self.linear_floats) def test_inv_transform(self): expr = ~dim('booleans') self.assert_apply(expr, ~self.booleans) # Binary operators def test_add_transform(self): expr = dim('float') + 1 self.assert_apply(expr, self.linear_floats + 1) def test_div_transform(self): expr = dim('int') / 10. self.assert_apply(expr, self.linear_floats) def test_floor_div_transform(self): expr = dim('int') // 2 self.assert_apply(expr, self.linear_ints // 2) def test_mod_transform(self): expr = dim('int') % 2 self.assert_apply(expr, self.linear_ints % 2) def test_mul_transform(self): expr = dim('float') * 10. self.assert_apply(expr, self.linear_ints.astype('float64')) def test_pow_transform(self): expr = dim('int')**2 self.assert_apply(expr, self.linear_ints**2) def test_sub_transform(self): expr = dim('int') - 10 self.assert_apply(expr, self.linear_ints - 10) # Reverse binary operators def test_radd_transform(self): expr = 1 + dim('float') self.assert_apply(expr, 1 + self.linear_floats) def test_rdiv_transform(self): expr = 10. / dim('int') self.assert_apply(expr, 10. / self.linear_ints) def test_rfloor_div_transform(self): expr = 2 // dim('int') self.assert_apply(expr, 2 // self.linear_ints) def test_rmod_transform(self): expr = 2 % dim('int') self.assert_apply(expr, 2 % self.linear_ints) def test_rmul_transform(self): expr = 10. * dim('float') self.assert_apply(expr, self.linear_ints.astype('float64')) def test_rsub_transform(self): expr = 10 - dim('int') self.assert_apply(expr, 10 - self.linear_ints) # NumPy operations def test_ufunc_transform(self): expr = np.sin(dim('float')) self.assert_apply(expr, np.sin(self.linear_floats)) def test_astype_transform(self): expr = dim('int').astype('float64') self.assert_apply(expr, self.linear_ints.astype('float64')) def test_cumsum_transform(self): expr = dim('float').cumsum() self.assert_apply(expr, self.linear_floats.cumsum()) def test_max_transform(self): expr = dim('float').max() self.assert_apply(expr, self.linear_floats.max()) def test_min_transform(self): expr = dim('float').min() self.assert_apply(expr, self.linear_floats.min()) def test_round_transform(self): expr = dim('float').round() self.assert_apply(expr, self.linear_floats.round()) def test_sum_transform(self): expr = dim('float').sum() self.assert_apply(expr, self.linear_floats.sum()) def test_std_transform(self): expr = dim('float').std(ddof=0) self.assert_apply(expr, self.linear_floats.std(ddof=0)) def test_var_transform(self): expr = dim('float').var(ddof=0) self.assert_apply(expr, self.linear_floats.var(ddof=0)) def test_log_transform(self): expr = dim('float').log() self.assert_apply(expr, np.log(self.linear_floats)) def test_log10_transform(self): expr = dim('float').log10() self.assert_apply(expr, np.log10(self.linear_floats)) # Custom functions def test_str_astype(self): expr = dim('int').str() self.assert_apply(expr, self.linear_ints.astype(str), skip_dask=True) def test_norm_transform(self): expr = dim('int').norm() self.assert_apply(expr, (self.linear_ints - 1) / 9.) def test_iloc_transform_int(self): expr = dim('int').iloc[1] self.assert_apply(expr, self.linear_ints[1]) def test_iloc_transform_slice(self): expr = dim('int').iloc[1:3] self.assert_apply(expr, self.linear_ints[1:3], skip_dask=True) def test_iloc_transform_list(self): expr = dim('int').iloc[[1, 3, 5]] self.assert_apply(expr, self.linear_ints[[1, 3, 5]], skip_dask=True) def test_bin_transform(self): expr = dim('int').bin([0, 5, 10]) expected = pd.Series( [2.5, 2.5, 2.5, 2.5, 2.5, 7.5, 7.5, 7.5, 7.5, 7.5]) self.assert_apply(expr, expected) def test_bin_transform_with_labels(self): expr = dim('int').bin([0, 5, 10], ['A', 'B']) expected = pd.Series( ['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) self.assert_apply(expr, expected) def test_categorize_transform_list(self): expr = dim('categories').categorize(['circle', 'square', 'triangle']) expected = pd.Series((['circle', 'square', 'triangle'] * 3) + ['circle']) # We skip dask because results will depend on partition structure self.assert_apply(expr, expected, skip_dask=True) def test_categorize_transform_dict(self): expr = dim('categories').categorize({ 'A': 'circle', 'B': 'square', 'C': 'triangle' }) expected = pd.Series((['circle', 'square', 'triangle'] * 3) + ['circle']) # We don't skip dask because results are now stable across partitions self.assert_apply(expr, expected) def test_categorize_transform_dict_with_default(self): expr = dim('categories').categorize({ 'A': 'circle', 'B': 'square' }, default='triangle') expected = pd.Series((['circle', 'square', 'triangle'] * 3) + ['circle']) # We don't skip dask because results are stable across partitions self.assert_apply(expr, expected) # Numpy functions def test_digitize(self): expr = dim('int').digitize([1, 5, 10]) expected = pd.Series(np.array([1, 1, 1, 1, 2, 2, 2, 2, 2, 3])).astype('int64') self.assert_apply(expr, expected) def test_isin(self): expr = dim('int').digitize([1, 5, 10]).isin([1, 3]) expected = pd.Series( np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 1], dtype='bool')) self.assert_apply(expr, expected) # Complex expressions def test_multi_operator_expression(self): expr = (((dim('float') - 2) * 3)**2) self.assert_apply(expr, ((self.linear_floats - 2) * 3)**2) def test_multi_dim_expression(self): expr = dim('int') - dim('float') self.assert_apply(expr, self.linear_ints - self.linear_floats) # Repr method def test_dim_repr(self): self.assertEqual(repr(dim('float')), "dim('float')") def test_unary_op_repr(self): self.assertEqual(repr(-dim('float')), "-dim('float')") def test_binary_op_repr(self): self.assertEqual(repr(dim('float') * 2), "dim('float')*2") def test_reverse_binary_op_repr(self): self.assertEqual(repr(1 + dim('float')), "1+dim('float')") def test_ufunc_expression_repr(self): self.assertEqual(repr(np.log(dim('float'))), "dim('float').log()") def test_custom_func_repr(self): self.assertEqual(repr(dim('float').norm()), "dim('float').norm()") def test_multi_operator_expression_repr(self): self.assertEqual(repr(((dim('float') - 2) * 3)**2), "((dim('float')-2)*3)**2") # Applies method def test_multi_dim_expression_applies(self): self.assertEqual((dim('int') - dim('float')).applies(self.dataset), True) def test_multi_dim_expression_not_applies(self): self.assertEqual((dim('foo') - dim('bar')).applies(self.dataset), False) def test_multi_dim_expression_partial_applies(self): self.assertEqual((dim('int') - dim('bar')).applies(self.dataset), False) # Check namespaced expressions def test_pandas_namespace_accessor_repr(self): self.assertEqual(repr(dim('date').df.dt.year), "dim('date').pd.dt.year") def test_pandas_str_accessor(self): expr = dim('categories').df.str.lower() self.assert_apply(expr, self.repeating.str.lower()) def test_pandas_chained_methods(self): expr = dim('int').df.rolling(1).mean() self.assert_apply(expr, self.linear_ints.rolling(1).mean()) @xr_skip def test_xarray_namespace_method_repr(self): self.assertEqual(repr(dim('date').xr.quantile(0.95)), "dim('date').xr.quantile(0.95)") @xr_skip def test_xarray_quantile_method(self): expr = dim('z').xr.quantile(0.95) self.assert_apply_xarray(expr, self.dataset_xarray.data.z.quantile(0.95), skip_dask=True) @xr_skip def test_xarray_roll_method(self): expr = dim('z').xr.roll({'x': 1}, roll_coords=False) self.assert_apply_xarray( expr, self.dataset_xarray.data.z.roll({'x': 1}, roll_coords=False)) @xr_skip @py2_skip def test_xarray_coarsen_method(self): expr = dim('z').xr.coarsen({'x': 4}).mean() self.assert_apply_xarray( expr, self.dataset_xarray.data.z.coarsen({ 'x': 4 }).mean())
class IbisDatasetTest(HeterogeneousColumnTests, ScalarColumnTests, InterfaceTests): """ Test of the generic dictionary interface. """ datatype = "ibis" data_type = (ibis.expr.types.Expr,) __test__ = True def setUp(self): self.init_column_data() self.init_grid_data() self.init_data() def tearDown(self): pass def init_column_data(self): # Create heterogeneously typed table self.kdims = ["Gender", "Age"] self.vdims = ["Weight", "Height"] self.gender, self.age = np.array(["M", "M", "F"]), np.array([10, 16, 12]) self.weight, self.height = np.array([15, 18, 10]), np.array([0.8, 0.6, 0.8]) hetero_df = pd.DataFrame( { "Gender": self.gender, "Age": self.age, "Weight": self.weight, "Height": self.height, }, columns=["Gender", "Age", "Weight", "Height"], ) hetero_db = create_temp_db(hetero_df, "hetero") self.table = Dataset( hetero_db.table("hetero"), kdims=self.kdims, vdims=self.vdims ) # Create table with aliased dimenion names self.alias_kdims = [("gender", "Gender"), ("age", "Age")] self.alias_vdims = [("weight", "Weight"), ("height", "Height")] alias_df = pd.DataFrame( { "gender": self.gender, "age": self.age, "weight": self.weight, "height": self.height, }, columns=["gender", "age", "weight", "height"], ) alias_db = create_temp_db(alias_df, "alias") self.alias_table = Dataset( alias_db.table("alias"), kdims=self.alias_kdims, vdims=self.alias_vdims ) self.xs = np.array(range(11)) self.xs_2 = self.xs ** 2 self.y_ints = self.xs * 2 self.ys = np.linspace(0, 1, 11) self.zs = np.sin(self.xs) ht_df = pd.DataFrame({"x": self.xs, "y": self.ys}, columns=["x", "y"]) ht_db = create_temp_db(ht_df, "ht") self.dataset_ht = Dataset(ht_db.table("ht"), kdims=["x"], vdims=["y"]) hm_df = pd.DataFrame({"x": self.xs, "y": self.y_ints}, columns=["x", "y"]) hm_db = create_temp_db(hm_df, "hm") self.dataset_hm = Dataset(hm_db.table("hm"), kdims=["x"], vdims=["y"]) self.dataset_hm_alias = Dataset( hm_db.table("hm"), kdims=[("x", "X")], vdims=[("y", "Y")] ) def test_dataset_array_init_hm(self): raise SkipTest("Not supported") def test_dataset_dict_dim_not_found_raises_on_scalar(self): raise SkipTest("Not supported") def test_dataset_array_init_hm_tuple_dims(self): raise SkipTest("Not supported") def test_dataset_odict_init(self): raise SkipTest("Not supported") def test_dataset_odict_init_alias(self): raise SkipTest("Not supported") def test_dataset_simple_zip_init(self): raise SkipTest("Not supported") def test_dataset_simple_zip_init_alias(self): raise SkipTest("Not supported") def test_dataset_zip_init(self): raise SkipTest("Not supported") def test_dataset_zip_init_alias(self): raise SkipTest("Not supported") def test_dataset_tuple_init(self): raise SkipTest("Not supported") def test_dataset_tuple_init_alias(self): raise SkipTest("Not supported") def test_dataset_implicit_indexing_init(self): raise SkipTest("Not supported") def test_dataset_dict_init(self): raise SkipTest("Not supported") def test_dataset_dataframe_init_hm(self): raise SkipTest("Not supported") def test_dataset_dataframe_init_hm_alias(self): raise SkipTest("Not supported") def test_dataset_dataframe_init_ht(self): raise SkipTest("Not supported") def test_dataset_dataframe_init_ht_alias(self): raise SkipTest("Not supported") def test_dataset_add_dimensions_values_hm(self): raise SkipTest("Not supported") def test_dataset_add_dimensions_values_ht(self): raise SkipTest("Not supported") def test_dataset_dataset_ht_dtypes(self): ds = self.table self.assertEqual(ds.interface.dtype(ds, "Gender"), np.dtype("object")) self.assertEqual(ds.interface.dtype(ds, "Age"), np.dtype("int32")) self.assertEqual(ds.interface.dtype(ds, "Weight"), np.dtype("int32")) self.assertEqual(ds.interface.dtype(ds, "Height"), np.dtype("float64")) def test_dataset_dtypes(self): self.assertEqual( self.dataset_hm.interface.dtype(self.dataset_hm, "x"), np.dtype("int32") ) self.assertEqual( self.dataset_hm.interface.dtype(self.dataset_hm, "y"), np.dtype("int32") ) def test_dataset_reduce_ht(self): reduced = Dataset( {"Age": self.age, "Weight": self.weight, "Height": self.height}, kdims=self.kdims[1:], vdims=self.vdims, ) self.assertEqual(self.table.reduce(["Gender"], np.mean).sort(), reduced.sort()) def test_dataset_aggregate_ht(self): aggregated = Dataset( {"Gender": ["M", "F"], "Weight": [16.5, 10], "Height": [0.7, 0.8]}, kdims=self.kdims[:1], vdims=self.vdims, ) self.compare_dataset( self.table.aggregate(["Gender"], np.mean).sort(), aggregated.sort() ) def test_dataset_aggregate_ht_alias(self): aggregated = Dataset( {"gender": ["M", "F"], "weight": [16.5, 10], "height": [0.7, 0.8]}, kdims=self.alias_kdims[:1], vdims=self.alias_vdims, ) self.compare_dataset( self.alias_table.aggregate("Gender", np.mean).sort(), aggregated.sort() ) def test_dataset_groupby(self): group1 = {"Age": [10, 16], "Weight": [15, 18], "Height": [0.8, 0.6]} group2 = {"Age": [12], "Weight": [10], "Height": [0.8]} grouped = HoloMap( [ ("M", Dataset(group1, kdims=["Age"], vdims=self.vdims)), ("F", Dataset(group2, kdims=["Age"], vdims=self.vdims)), ], kdims=["Gender"], ) self.assertEqual( self.table.groupby(["Gender"]).apply("sort"), grouped.apply("sort") ) def test_dataset_groupby_alias(self): group1 = {"age": [10, 16], "weight": [15, 18], "height": [0.8, 0.6]} group2 = {"age": [12], "weight": [10], "height": [0.8]} grouped = HoloMap( [ ("M", Dataset(group1, kdims=[("age", "Age")], vdims=self.alias_vdims)), ("F", Dataset(group2, kdims=[("age", "Age")], vdims=self.alias_vdims)), ], kdims=[("gender", "Gender")], ) self.assertEqual(self.alias_table.groupby("Gender").apply("sort"), grouped) def test_dataset_groupby_second_dim(self): group1 = {"Gender": ["M"], "Weight": [15], "Height": [0.8]} group2 = {"Gender": ["M"], "Weight": [18], "Height": [0.6]} group3 = {"Gender": ["F"], "Weight": [10], "Height": [0.8]} grouped = HoloMap( [ (10, Dataset(group1, kdims=["Gender"], vdims=self.vdims)), (16, Dataset(group2, kdims=["Gender"], vdims=self.vdims)), (12, Dataset(group3, kdims=["Gender"], vdims=self.vdims)), ], kdims=["Age"], sort=True, ) self.assertEqual(self.table.groupby(["Age"]), grouped) def test_aggregation_operations(self): for agg in [ np.min, np.nanmin, np.max, np.nanmax, np.mean, np.nanmean, np.sum, np.nansum, len, np.count_nonzero, # TODO: var-based operations failing this test # np.std, np.nanstd, np.var, np.nanvar ]: data = self.table.dframe() expected = self.table.clone( data=data ).aggregate("Gender", agg).sort() result = self.table.aggregate("Gender", agg).sort() self.compare_dataset(expected, result, msg=str(agg)) if not IbisInterface.has_rowid(): def test_dataset_iloc_slice_rows_slice_cols(self): raise SkipTest("Not supported") def test_dataset_iloc_slice_rows_list_cols(self): raise SkipTest("Not supported") def test_dataset_iloc_slice_rows_index_cols(self): raise SkipTest("Not supported") def test_dataset_iloc_slice_rows(self): raise SkipTest("Not supported") def test_dataset_iloc_list_rows_slice_cols(self): raise SkipTest("Not supported") def test_dataset_iloc_list_rows_list_cols_by_name(self): raise SkipTest("Not supported") def test_dataset_iloc_list_rows_list_cols(self): raise SkipTest("Not supported") def test_dataset_iloc_list_rows(self): raise SkipTest("Not supported") def test_dataset_iloc_list_cols_by_name(self): raise SkipTest("Not supported") def test_dataset_iloc_list_cols(self): raise SkipTest("Not supported") def test_dataset_iloc_index_rows_slice_cols(self): raise SkipTest("Not supported") def test_dataset_iloc_index_rows_index_cols(self): raise SkipTest("Not supported") def test_dataset_iloc_ellipsis_list_cols_by_name(self): raise SkipTest("Not supported") def test_dataset_iloc_ellipsis_list_cols(self): raise SkipTest("Not supported") def test_dataset_boolean_index(self): raise SkipTest("Not supported")