def test_mask_2d_array(self):
     array = np.random.rand(4, 3)
     ds = Dataset(([0, 1, 2], [1, 2, 3, 4], array), ['x', 'y'], 'z')
     mask = np.array([[1, 1, 0], [1, 0, 1], [0, 1, 1], [1, 0, 1]],
                     dtype='bool')
     masked = ds.clone(ds.interface.mask(ds, mask))
     masked_array = masked.dimension_values(2, flat=False)
     expected = array.copy()
     expected[mask] = np.nan
     self.assertEqual(masked_array, expected)
 def test_mask_2d_array_transposed(self):
     array = np.random.rand(4, 3)
     da = xr.DataArray(array.T,
                       coords={
                           'x': [0, 1, 2],
                           'y': [0, 1, 2, 3]
                       },
                       dims=['x', 'y'])
     ds = Dataset(da, ['x', 'y'], 'z')
     mask = np.array([[1, 1, 0], [1, 0, 1], [0, 1, 1], [1, 0, 1]],
                     dtype='bool')
     masked = ds.clone(ds.interface.mask(ds, mask))
     masked_array = masked.dimension_values(2, flat=False)
     expected = array.copy()
     expected[mask] = np.nan
     self.assertEqual(masked_array, expected)
示例#3
0
class TestDimTransforms(ComparisonTestCase):
    def setUp(self):
        self.linear_ints = pd.Series(np.arange(1, 11))
        self.linear_floats = pd.Series(np.arange(1, 11) / 10.)
        self.negative = pd.Series(-self.linear_floats)
        self.repeating = pd.Series(
            ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A'])
        self.dataset = Dataset((self.linear_ints, self.linear_floats,
                                self.negative, self.repeating),
                               ['int', 'float', 'negative', 'categories'])

        if dd is None:
            return

        ddf = dd.from_pandas(self.dataset.data, npartitions=2)
        self.dataset_dask = self.dataset.clone(data=ddf)

    # Assertion helpers

    def check_apply(self, expr, expected, skip_dask=False):
        if np.isscalar(expected):
            # Pandas input
            self.assertEqual(expr.apply(self.dataset, keep_index=False),
                             expected)
            self.assertEqual(expr.apply(self.dataset, keep_index=True),
                             expected)

            if dd is None:
                return

            # Dask input
            self.assertEqual(expr.apply(self.dataset_dask, keep_index=False),
                             expected)
            self.assertEqual(expr.apply(self.dataset_dask, keep_index=True),
                             expected)
            return

        # Make sure expected is a pandas Series
        self.assertIsInstance(expected, pd.Series)

        # Check using dataset backed by pandas DataFrame
        # keep_index=False
        np.testing.assert_equal(expr.apply(self.dataset), expected.values)
        # keep_index=True
        pd.testing.assert_series_equal(expr.apply(self.dataset,
                                                  keep_index=True),
                                       expected,
                                       check_names=False)

        if skip_dask or dd is None:
            return

        # Check using dataset backed by Dask DataFrame
        expected_dask = dd.from_pandas(expected, npartitions=2)

        # keep_index=False, compute=False
        da.assert_eq(expr.apply(self.dataset_dask, compute=False),
                     expected_dask.values)
        # keep_index=True, compute=False
        dd.assert_eq(expr.apply(self.dataset_dask,
                                keep_index=True,
                                compute=False),
                     expected_dask,
                     check_names=False)
        # keep_index=False, compute=True
        np.testing.assert_equal(expr.apply(self.dataset_dask, compute=True),
                                expected_dask.values.compute())
        # keep_index=True, compute=True
        pd.testing.assert_series_equal(expr.apply(self.dataset_dask,
                                                  keep_index=True,
                                                  compute=True),
                                       expected_dask.compute(),
                                       check_names=False)

    # Unary operators

    def test_abs_transform(self):
        expr = abs(dim('negative'))
        self.check_apply(expr, self.linear_floats)

    def test_neg_transform(self):
        expr = -dim('negative')
        self.check_apply(expr, self.linear_floats)

    # Binary operators

    def test_add_transform(self):
        expr = dim('float') + 1
        self.check_apply(expr, self.linear_floats + 1)

    def test_div_transform(self):
        expr = dim('int') / 10.
        self.check_apply(expr, self.linear_floats)

    def test_floor_div_transform(self):
        expr = dim('int') // 2
        self.check_apply(expr, self.linear_ints // 2)

    def test_mod_transform(self):
        expr = dim('int') % 2
        self.check_apply(expr, self.linear_ints % 2)

    def test_mul_transform(self):
        expr = dim('float') * 10.
        self.check_apply(expr, self.linear_ints.astype('float64'))

    def test_pow_transform(self):
        expr = dim('int')**2
        self.check_apply(expr, self.linear_ints**2)

    def test_sub_transform(self):
        expr = dim('int') - 10
        self.check_apply(expr, self.linear_ints - 10)

    # Reverse binary operators

    def test_radd_transform(self):
        expr = 1 + dim('float')
        self.check_apply(expr, 1 + self.linear_floats)

    def test_rdiv_transform(self):
        expr = 10. / dim('int')
        self.check_apply(expr, 10. / self.linear_ints)

    def test_rfloor_div_transform(self):
        expr = 2 // dim('int')
        self.check_apply(expr, 2 // self.linear_ints)

    def test_rmod_transform(self):
        expr = 2 % dim('int')
        self.check_apply(expr, 2 % self.linear_ints)

    def test_rmul_transform(self):
        expr = 10. * dim('float')
        self.check_apply(expr, self.linear_ints.astype('float64'))

    def test_rsub_transform(self):
        expr = 10 - dim('int')
        self.check_apply(expr, 10 - self.linear_ints)

    # NumPy operations

    def test_ufunc_transform(self):
        expr = np.sin(dim('float'))
        self.check_apply(expr, np.sin(self.linear_floats))

    def test_astype_transform(self):
        expr = dim('int').astype('float64')
        self.check_apply(expr, self.linear_ints.astype('float64'))

    def test_cumsum_transform(self):
        expr = dim('float').cumsum()
        self.check_apply(expr, self.linear_floats.cumsum())

    def test_max_transform(self):
        expr = dim('float').max()
        self.check_apply(expr, self.linear_floats.max())

    def test_min_transform(self):
        expr = dim('float').min()
        self.check_apply(expr, self.linear_floats.min())

    def test_round_transform(self):
        expr = dim('float').round()
        self.check_apply(expr, self.linear_floats.round())

    def test_sum_transform(self):
        expr = dim('float').sum()
        self.check_apply(expr, self.linear_floats.sum())

    def test_std_transform(self):
        expr = dim('float').std()
        self.check_apply(expr, self.linear_floats.std(ddof=0))

    def test_var_transform(self):
        expr = dim('float').var()
        self.check_apply(expr, self.linear_floats.var(ddof=0))

    def test_log_transform(self):
        expr = dim('float').log()
        self.check_apply(expr, np.log(self.linear_floats))

    def test_log10_transform(self):
        expr = dim('float').log10()
        self.check_apply(expr, np.log10(self.linear_floats))

    # Custom functions

    def test_norm_transform(self):
        expr = dim('int').norm()
        self.check_apply(expr, (self.linear_ints - 1) / 9.)

    def test_bin_transform(self):
        expr = dim('int').bin([0, 5, 10])
        expected = pd.Series(
            [2.5, 2.5, 2.5, 2.5, 2.5, 7.5, 7.5, 7.5, 7.5, 7.5])
        self.check_apply(expr, expected)

    def test_bin_transform_with_labels(self):
        expr = dim('int').bin([0, 5, 10], ['A', 'B'])
        expected = pd.Series(
            ['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B'])
        self.check_apply(expr, expected)

    def test_categorize_transform_list(self):
        expr = dim('categories').categorize(['circle', 'square', 'triangle'])
        expected = pd.Series((['circle', 'square', 'triangle'] * 3) +
                             ['circle'])
        # We skip dask because results will depend on partition structure
        self.check_apply(expr, expected, skip_dask=True)

    def test_categorize_transform_dict(self):
        expr = dim('categories').categorize({
            'A': 'circle',
            'B': 'square',
            'C': 'triangle'
        })
        expected = pd.Series((['circle', 'square', 'triangle'] * 3) +
                             ['circle'])
        # We don't skip dask because results are now stable across partitions
        self.check_apply(expr, expected)

    def test_categorize_transform_dict_with_default(self):
        expr = dim('categories').categorize({
            'A': 'circle',
            'B': 'square'
        },
                                            default='triangle')
        expected = pd.Series((['circle', 'square', 'triangle'] * 3) +
                             ['circle'])
        # We don't skip dask because results are stable across partitions
        self.check_apply(expr, expected)

    # Numpy functions

    def test_digitize(self):
        expr = dim('int').digitize([1, 5, 10])
        expected = pd.Series(np.array([1, 1, 1, 1, 2, 2, 2, 2, 2, 3]))
        self.check_apply(expr, expected)

    def test_isin(self):
        expr = dim('int').digitize([1, 5, 10]).isin([1, 3])
        expected = pd.Series(
            np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 1], dtype='bool'))
        self.check_apply(expr, expected)

    # Complex expressions

    def test_multi_operator_expression(self):
        expr = (((dim('float') - 2) * 3)**2)
        self.check_apply(expr, ((self.linear_floats - 2) * 3)**2)

    def test_multi_dim_expression(self):
        expr = dim('int') - dim('float')
        self.check_apply(expr, self.linear_ints - self.linear_floats)

    # Repr method

    def test_dim_repr(self):
        self.assertEqual(repr(dim('float')), "'float'")

    def test_unary_op_repr(self):
        self.assertEqual(repr(-dim('float')), "-dim('float')")

    def test_binary_op_repr(self):
        self.assertEqual(repr(dim('float') * 2), "dim('float')*2")

    def test_reverse_binary_op_repr(self):
        self.assertEqual(repr(1 + dim('float')), "1+dim('float')")

    def test_ufunc_expression_repr(self):
        self.assertEqual(repr(np.log(dim('float'))), "dim('float').log()")

    def test_custom_func_repr(self):
        self.assertEqual(repr(dim('float').norm()), "dim('float').norm()")

    def test_multi_operator_expression_repr(self):
        self.assertEqual(repr(((dim('float') - 2) * 3)**2),
                         "((dim('float')-2)*3)**2")

    # Applies method

    def test_multi_dim_expression_applies(self):
        self.assertEqual((dim('int') - dim('float')).applies(self.dataset),
                         True)

    def test_multi_dim_expression_not_applies(self):
        self.assertEqual((dim('foo') - dim('bar')).applies(self.dataset),
                         False)

    def test_multi_dim_expression_partial_applies(self):
        self.assertEqual((dim('int') - dim('bar')).applies(self.dataset),
                         False)
示例#4
0
class TestDimTransforms(ComparisonTestCase):
    def setUp(self):
        self.linear_ints = pd.Series(np.arange(1, 11))
        self.linear_floats = pd.Series(np.arange(1, 11) / 10.)
        self.negative = pd.Series(-self.linear_floats)
        self.repeating = pd.Series(
            ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A'])
        self.booleans = self.repeating == 'A'
        self.dataset = Dataset(
            (self.linear_ints, self.linear_floats, self.negative,
             self.repeating, self.booleans),
            ['int', 'float', 'negative', 'categories', 'booleans'])

        if dd is not None:
            ddf = dd.from_pandas(self.dataset.data, npartitions=2)
            self.dataset_dask = self.dataset.clone(data=ddf)

        if xr is None:
            return

        x = np.arange(2, 62, 3)
        y = np.arange(2, 12, 2)
        array = np.arange(100).reshape(5, 20)
        darray = xr.DataArray(data=array,
                              coords=OrderedDict([('x', x), ('y', y)]),
                              dims=['y', 'x'])
        self.dataset_xarray = Dataset(darray, vdims=['z'])
        if da is not None:
            dask_array = da.from_array(array)
            dask_da = xr.DataArray(data=dask_array,
                                   coords=OrderedDict([('x', x), ('y', y)]),
                                   dims=['y', 'x'])
            self.dataset_xarray_dask = Dataset(dask_da, vdims=['z'])

    # Assertion helpers

    def assert_apply(self,
                     expr,
                     expected,
                     skip_dask=False,
                     skip_no_index=False):
        if np.isscalar(expected):
            # Pandas input
            self.assertEqual(expr.apply(self.dataset, keep_index=False),
                             expected)
            self.assertEqual(expr.apply(self.dataset, keep_index=True),
                             expected)

            if dd is None:
                return

            # Dask input
            self.assertEqual(expr.apply(self.dataset_dask, keep_index=False),
                             expected)
            self.assertEqual(expr.apply(self.dataset_dask, keep_index=True),
                             expected)
            return

        # Make sure expected is a pandas Series
        self.assertIsInstance(expected, pd.Series)

        # Check using dataset backed by pandas DataFrame
        # keep_index=False
        if not skip_no_index:
            np.testing.assert_equal(expr.apply(self.dataset), expected.values)
        # keep_index=True
        pd.testing.assert_series_equal(expr.apply(self.dataset,
                                                  keep_index=True),
                                       expected,
                                       check_names=False)

        if skip_dask or dd is None:
            return

        # Check using dataset backed by Dask DataFrame
        expected_dask = dd.from_pandas(expected, npartitions=2)

        # keep_index=False, compute=False
        if not skip_no_index:
            da.assert_eq(
                expr.apply(self.dataset_dask, compute=False).compute(),
                expected_dask.values.compute())
        # keep_index=True, compute=False
        dd.assert_eq(expr.apply(self.dataset_dask,
                                keep_index=True,
                                compute=False),
                     expected_dask,
                     check_names=False)
        # keep_index=False, compute=True
        if not skip_no_index:
            np.testing.assert_equal(
                expr.apply(self.dataset_dask, compute=True),
                expected_dask.values.compute())
        # keep_index=True, compute=True
        pd.testing.assert_series_equal(expr.apply(self.dataset_dask,
                                                  keep_index=True,
                                                  compute=True),
                                       expected_dask.compute(),
                                       check_names=False)

    def assert_apply_xarray(self,
                            expr,
                            expected,
                            skip_dask=False,
                            skip_no_index=False):
        import xarray as xr
        if np.isscalar(expected):
            # Pandas input
            self.assertEqual(expr.apply(self.dataset_xarray, keep_index=False),
                             expected)
            self.assertEqual(expr.apply(self.dataset_xarray, keep_index=True),
                             expected)
            return

        # Make sure expected is a pandas Series
        self.assertIsInstance(expected, xr.DataArray)

        # Check using dataset backed by pandas DataFrame
        # keep_index=False
        if not skip_no_index:
            np.testing.assert_equal(expr.apply(self.dataset_xarray),
                                    expected.values)
        # keep_index=True
        xr.testing.assert_equal(
            expr.apply(self.dataset_xarray, keep_index=True), expected)

        if skip_dask or da is None:
            return

        # Check using dataset backed by Dask DataFrame
        expected_da = da.from_array(expected.values)
        expected_dask = expected.copy()
        expected_dask.data = expected_da

        # keep_index=False, compute=False
        if not skip_no_index:
            da.assert_eq(expr.apply(self.dataset_xarray_dask, compute=False),
                         expected_dask.data)
        # keep_index=True, compute=False
        xr.testing.assert_equal(
            expr.apply(self.dataset_xarray_dask,
                       keep_index=True,
                       compute=False),
            expected_dask,
        )
        # keep_index=False, compute=True
        if not skip_no_index:
            np.testing.assert_equal(
                expr.apply(self.dataset_xarray_dask, compute=True),
                expected_dask.data.compute())
        # keep_index=True, compute=True
        xr.testing.assert_equal(
            expr.apply(self.dataset_xarray_dask, keep_index=True,
                       compute=True),
            expected_dask.compute(),
        )

    # Unary operators

    def test_abs_transform(self):
        expr = abs(dim('negative'))
        self.assert_apply(expr, self.linear_floats)

    def test_neg_transform(self):
        expr = -dim('negative')
        self.assert_apply(expr, self.linear_floats)

    def test_inv_transform(self):
        expr = ~dim('booleans')
        self.assert_apply(expr, ~self.booleans)

    # Binary operators

    def test_add_transform(self):
        expr = dim('float') + 1
        self.assert_apply(expr, self.linear_floats + 1)

    def test_div_transform(self):
        expr = dim('int') / 10.
        self.assert_apply(expr, self.linear_floats)

    def test_floor_div_transform(self):
        expr = dim('int') // 2
        self.assert_apply(expr, self.linear_ints // 2)

    def test_mod_transform(self):
        expr = dim('int') % 2
        self.assert_apply(expr, self.linear_ints % 2)

    def test_mul_transform(self):
        expr = dim('float') * 10.
        self.assert_apply(expr, self.linear_ints.astype('float64'))

    def test_pow_transform(self):
        expr = dim('int')**2
        self.assert_apply(expr, self.linear_ints**2)

    def test_sub_transform(self):
        expr = dim('int') - 10
        self.assert_apply(expr, self.linear_ints - 10)

    # Reverse binary operators

    def test_radd_transform(self):
        expr = 1 + dim('float')
        self.assert_apply(expr, 1 + self.linear_floats)

    def test_rdiv_transform(self):
        expr = 10. / dim('int')
        self.assert_apply(expr, 10. / self.linear_ints)

    def test_rfloor_div_transform(self):
        expr = 2 // dim('int')
        self.assert_apply(expr, 2 // self.linear_ints)

    def test_rmod_transform(self):
        expr = 2 % dim('int')
        self.assert_apply(expr, 2 % self.linear_ints)

    def test_rmul_transform(self):
        expr = 10. * dim('float')
        self.assert_apply(expr, self.linear_ints.astype('float64'))

    def test_rsub_transform(self):
        expr = 10 - dim('int')
        self.assert_apply(expr, 10 - self.linear_ints)

    # NumPy operations

    def test_ufunc_transform(self):
        expr = np.sin(dim('float'))
        self.assert_apply(expr, np.sin(self.linear_floats))

    def test_astype_transform(self):
        expr = dim('int').astype('float64')
        self.assert_apply(expr, self.linear_ints.astype('float64'))

    def test_cumsum_transform(self):
        expr = dim('float').cumsum()
        self.assert_apply(expr, self.linear_floats.cumsum())

    def test_max_transform(self):
        expr = dim('float').max()
        self.assert_apply(expr, self.linear_floats.max())

    def test_min_transform(self):
        expr = dim('float').min()
        self.assert_apply(expr, self.linear_floats.min())

    def test_round_transform(self):
        expr = dim('float').round()
        self.assert_apply(expr, self.linear_floats.round())

    def test_sum_transform(self):
        expr = dim('float').sum()
        self.assert_apply(expr, self.linear_floats.sum())

    def test_std_transform(self):
        expr = dim('float').std(ddof=0)
        self.assert_apply(expr, self.linear_floats.std(ddof=0))

    def test_var_transform(self):
        expr = dim('float').var(ddof=0)
        self.assert_apply(expr, self.linear_floats.var(ddof=0))

    def test_log_transform(self):
        expr = dim('float').log()
        self.assert_apply(expr, np.log(self.linear_floats))

    def test_log10_transform(self):
        expr = dim('float').log10()
        self.assert_apply(expr, np.log10(self.linear_floats))

    # Custom functions

    def test_str_astype(self):
        expr = dim('int').str()
        self.assert_apply(expr, self.linear_ints.astype(str), skip_dask=True)

    def test_norm_transform(self):
        expr = dim('int').norm()
        self.assert_apply(expr, (self.linear_ints - 1) / 9.)

    def test_iloc_transform_int(self):
        expr = dim('int').iloc[1]
        self.assert_apply(expr, self.linear_ints[1])

    def test_iloc_transform_slice(self):
        expr = dim('int').iloc[1:3]
        self.assert_apply(expr, self.linear_ints[1:3], skip_dask=True)

    def test_iloc_transform_list(self):
        expr = dim('int').iloc[[1, 3, 5]]
        self.assert_apply(expr, self.linear_ints[[1, 3, 5]], skip_dask=True)

    def test_bin_transform(self):
        expr = dim('int').bin([0, 5, 10])
        expected = pd.Series(
            [2.5, 2.5, 2.5, 2.5, 2.5, 7.5, 7.5, 7.5, 7.5, 7.5])
        self.assert_apply(expr, expected)

    def test_bin_transform_with_labels(self):
        expr = dim('int').bin([0, 5, 10], ['A', 'B'])
        expected = pd.Series(
            ['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B'])
        self.assert_apply(expr, expected)

    def test_categorize_transform_list(self):
        expr = dim('categories').categorize(['circle', 'square', 'triangle'])
        expected = pd.Series((['circle', 'square', 'triangle'] * 3) +
                             ['circle'])
        # We skip dask because results will depend on partition structure
        self.assert_apply(expr, expected, skip_dask=True)

    def test_categorize_transform_dict(self):
        expr = dim('categories').categorize({
            'A': 'circle',
            'B': 'square',
            'C': 'triangle'
        })
        expected = pd.Series((['circle', 'square', 'triangle'] * 3) +
                             ['circle'])
        # We don't skip dask because results are now stable across partitions
        self.assert_apply(expr, expected)

    def test_categorize_transform_dict_with_default(self):
        expr = dim('categories').categorize({
            'A': 'circle',
            'B': 'square'
        },
                                            default='triangle')
        expected = pd.Series((['circle', 'square', 'triangle'] * 3) +
                             ['circle'])
        # We don't skip dask because results are stable across partitions
        self.assert_apply(expr, expected)

    # Numpy functions

    def test_digitize(self):
        expr = dim('int').digitize([1, 5, 10])
        expected = pd.Series(np.array([1, 1, 1, 1, 2, 2, 2, 2, 2,
                                       3])).astype('int64')
        self.assert_apply(expr, expected)

    def test_isin(self):
        expr = dim('int').digitize([1, 5, 10]).isin([1, 3])
        expected = pd.Series(
            np.array([1, 1, 1, 1, 0, 0, 0, 0, 0, 1], dtype='bool'))
        self.assert_apply(expr, expected)

    # Complex expressions

    def test_multi_operator_expression(self):
        expr = (((dim('float') - 2) * 3)**2)
        self.assert_apply(expr, ((self.linear_floats - 2) * 3)**2)

    def test_multi_dim_expression(self):
        expr = dim('int') - dim('float')
        self.assert_apply(expr, self.linear_ints - self.linear_floats)

    # Repr method

    def test_dim_repr(self):
        self.assertEqual(repr(dim('float')), "dim('float')")

    def test_unary_op_repr(self):
        self.assertEqual(repr(-dim('float')), "-dim('float')")

    def test_binary_op_repr(self):
        self.assertEqual(repr(dim('float') * 2), "dim('float')*2")

    def test_reverse_binary_op_repr(self):
        self.assertEqual(repr(1 + dim('float')), "1+dim('float')")

    def test_ufunc_expression_repr(self):
        self.assertEqual(repr(np.log(dim('float'))), "dim('float').log()")

    def test_custom_func_repr(self):
        self.assertEqual(repr(dim('float').norm()), "dim('float').norm()")

    def test_multi_operator_expression_repr(self):
        self.assertEqual(repr(((dim('float') - 2) * 3)**2),
                         "((dim('float')-2)*3)**2")

    # Applies method

    def test_multi_dim_expression_applies(self):
        self.assertEqual((dim('int') - dim('float')).applies(self.dataset),
                         True)

    def test_multi_dim_expression_not_applies(self):
        self.assertEqual((dim('foo') - dim('bar')).applies(self.dataset),
                         False)

    def test_multi_dim_expression_partial_applies(self):
        self.assertEqual((dim('int') - dim('bar')).applies(self.dataset),
                         False)

    # Check namespaced expressions

    def test_pandas_namespace_accessor_repr(self):
        self.assertEqual(repr(dim('date').df.dt.year),
                         "dim('date').pd.dt.year")

    def test_pandas_str_accessor(self):
        expr = dim('categories').df.str.lower()
        self.assert_apply(expr, self.repeating.str.lower())

    def test_pandas_chained_methods(self):
        expr = dim('int').df.rolling(1).mean()
        self.assert_apply(expr, self.linear_ints.rolling(1).mean())

    @xr_skip
    def test_xarray_namespace_method_repr(self):
        self.assertEqual(repr(dim('date').xr.quantile(0.95)),
                         "dim('date').xr.quantile(0.95)")

    @xr_skip
    def test_xarray_quantile_method(self):
        expr = dim('z').xr.quantile(0.95)
        self.assert_apply_xarray(expr,
                                 self.dataset_xarray.data.z.quantile(0.95),
                                 skip_dask=True)

    @xr_skip
    def test_xarray_roll_method(self):
        expr = dim('z').xr.roll({'x': 1}, roll_coords=False)
        self.assert_apply_xarray(
            expr, self.dataset_xarray.data.z.roll({'x': 1}, roll_coords=False))

    @xr_skip
    @py2_skip
    def test_xarray_coarsen_method(self):
        expr = dim('z').xr.coarsen({'x': 4}).mean()
        self.assert_apply_xarray(
            expr,
            self.dataset_xarray.data.z.coarsen({
                'x': 4
            }).mean())
示例#5
0
class IbisDatasetTest(HeterogeneousColumnTests, ScalarColumnTests, InterfaceTests):
    """
    Test of the generic dictionary interface.
    """

    datatype = "ibis"
    data_type = (ibis.expr.types.Expr,)

    __test__ = True

    def setUp(self):
        self.init_column_data()
        self.init_grid_data()
        self.init_data()

    def tearDown(self):
        pass

    def init_column_data(self):
        # Create heterogeneously typed table
        self.kdims = ["Gender", "Age"]
        self.vdims = ["Weight", "Height"]
        self.gender, self.age = np.array(["M", "M", "F"]), np.array([10, 16, 12])
        self.weight, self.height = np.array([15, 18, 10]), np.array([0.8, 0.6, 0.8])

        hetero_df = pd.DataFrame(
            {
                "Gender": self.gender,
                "Age": self.age,
                "Weight": self.weight,
                "Height": self.height,
            },
            columns=["Gender", "Age", "Weight", "Height"],
        )
        hetero_db = create_temp_db(hetero_df, "hetero")
        self.table = Dataset(
            hetero_db.table("hetero"), kdims=self.kdims, vdims=self.vdims
        )

        # Create table with aliased dimenion names
        self.alias_kdims = [("gender", "Gender"), ("age", "Age")]
        self.alias_vdims = [("weight", "Weight"), ("height", "Height")]
        alias_df = pd.DataFrame(
            {
                "gender": self.gender,
                "age": self.age,
                "weight": self.weight,
                "height": self.height,
            },
            columns=["gender", "age", "weight", "height"],
        )
        alias_db = create_temp_db(alias_df, "alias")
        self.alias_table = Dataset(
            alias_db.table("alias"), kdims=self.alias_kdims, vdims=self.alias_vdims
        )

        self.xs = np.array(range(11))
        self.xs_2 = self.xs ** 2
        self.y_ints = self.xs * 2
        self.ys = np.linspace(0, 1, 11)
        self.zs = np.sin(self.xs)

        ht_df = pd.DataFrame({"x": self.xs, "y": self.ys}, columns=["x", "y"])
        ht_db = create_temp_db(ht_df, "ht")
        self.dataset_ht = Dataset(ht_db.table("ht"), kdims=["x"], vdims=["y"])

        hm_df = pd.DataFrame({"x": self.xs, "y": self.y_ints}, columns=["x", "y"])
        hm_db = create_temp_db(hm_df, "hm")
        self.dataset_hm = Dataset(hm_db.table("hm"), kdims=["x"], vdims=["y"])
        self.dataset_hm_alias = Dataset(
            hm_db.table("hm"), kdims=[("x", "X")], vdims=[("y", "Y")]
        )

    def test_dataset_array_init_hm(self):
        raise SkipTest("Not supported")

    def test_dataset_dict_dim_not_found_raises_on_scalar(self):
        raise SkipTest("Not supported")

    def test_dataset_array_init_hm_tuple_dims(self):
        raise SkipTest("Not supported")

    def test_dataset_odict_init(self):
        raise SkipTest("Not supported")

    def test_dataset_odict_init_alias(self):
        raise SkipTest("Not supported")

    def test_dataset_simple_zip_init(self):
        raise SkipTest("Not supported")

    def test_dataset_simple_zip_init_alias(self):
        raise SkipTest("Not supported")

    def test_dataset_zip_init(self):
        raise SkipTest("Not supported")

    def test_dataset_zip_init_alias(self):
        raise SkipTest("Not supported")

    def test_dataset_tuple_init(self):
        raise SkipTest("Not supported")

    def test_dataset_tuple_init_alias(self):
        raise SkipTest("Not supported")

    def test_dataset_implicit_indexing_init(self):
        raise SkipTest("Not supported")

    def test_dataset_dict_init(self):
        raise SkipTest("Not supported")

    def test_dataset_dataframe_init_hm(self):
        raise SkipTest("Not supported")

    def test_dataset_dataframe_init_hm_alias(self):
        raise SkipTest("Not supported")

    def test_dataset_dataframe_init_ht(self):
        raise SkipTest("Not supported")

    def test_dataset_dataframe_init_ht_alias(self):
        raise SkipTest("Not supported")

    def test_dataset_add_dimensions_values_hm(self):
        raise SkipTest("Not supported")

    def test_dataset_add_dimensions_values_ht(self):
        raise SkipTest("Not supported")

    def test_dataset_dataset_ht_dtypes(self):
        ds = self.table
        self.assertEqual(ds.interface.dtype(ds, "Gender"), np.dtype("object"))
        self.assertEqual(ds.interface.dtype(ds, "Age"), np.dtype("int32"))
        self.assertEqual(ds.interface.dtype(ds, "Weight"), np.dtype("int32"))
        self.assertEqual(ds.interface.dtype(ds, "Height"), np.dtype("float64"))

    def test_dataset_dtypes(self):
        self.assertEqual(
            self.dataset_hm.interface.dtype(self.dataset_hm, "x"), np.dtype("int32")
        )
        self.assertEqual(
            self.dataset_hm.interface.dtype(self.dataset_hm, "y"), np.dtype("int32")
        )

    def test_dataset_reduce_ht(self):
        reduced = Dataset(
            {"Age": self.age, "Weight": self.weight, "Height": self.height},
            kdims=self.kdims[1:],
            vdims=self.vdims,
        )
        self.assertEqual(self.table.reduce(["Gender"], np.mean).sort(), reduced.sort())

    def test_dataset_aggregate_ht(self):
        aggregated = Dataset(
            {"Gender": ["M", "F"], "Weight": [16.5, 10], "Height": [0.7, 0.8]},
            kdims=self.kdims[:1],
            vdims=self.vdims,
        )
        self.compare_dataset(
            self.table.aggregate(["Gender"], np.mean).sort(), aggregated.sort()
        )

    def test_dataset_aggregate_ht_alias(self):
        aggregated = Dataset(
            {"gender": ["M", "F"], "weight": [16.5, 10], "height": [0.7, 0.8]},
            kdims=self.alias_kdims[:1],
            vdims=self.alias_vdims,
        )
        self.compare_dataset(
            self.alias_table.aggregate("Gender", np.mean).sort(), aggregated.sort()
        )

    def test_dataset_groupby(self):
        group1 = {"Age": [10, 16], "Weight": [15, 18], "Height": [0.8, 0.6]}
        group2 = {"Age": [12], "Weight": [10], "Height": [0.8]}
        grouped = HoloMap(
            [
                ("M", Dataset(group1, kdims=["Age"], vdims=self.vdims)),
                ("F", Dataset(group2, kdims=["Age"], vdims=self.vdims)),
            ],
            kdims=["Gender"],
        )
        self.assertEqual(
            self.table.groupby(["Gender"]).apply("sort"), grouped.apply("sort")
        )

    def test_dataset_groupby_alias(self):
        group1 = {"age": [10, 16], "weight": [15, 18], "height": [0.8, 0.6]}
        group2 = {"age": [12], "weight": [10], "height": [0.8]}
        grouped = HoloMap(
            [
                ("M", Dataset(group1, kdims=[("age", "Age")], vdims=self.alias_vdims)),
                ("F", Dataset(group2, kdims=[("age", "Age")], vdims=self.alias_vdims)),
            ],
            kdims=[("gender", "Gender")],
        )
        self.assertEqual(self.alias_table.groupby("Gender").apply("sort"), grouped)

    def test_dataset_groupby_second_dim(self):
        group1 = {"Gender": ["M"], "Weight": [15], "Height": [0.8]}
        group2 = {"Gender": ["M"], "Weight": [18], "Height": [0.6]}
        group3 = {"Gender": ["F"], "Weight": [10], "Height": [0.8]}
        grouped = HoloMap(
            [
                (10, Dataset(group1, kdims=["Gender"], vdims=self.vdims)),
                (16, Dataset(group2, kdims=["Gender"], vdims=self.vdims)),
                (12, Dataset(group3, kdims=["Gender"], vdims=self.vdims)),
            ],
            kdims=["Age"],
            sort=True,
        )
        self.assertEqual(self.table.groupby(["Age"]), grouped)

    def test_aggregation_operations(self):
        for agg in [
            np.min, np.nanmin, np.max, np.nanmax, np.mean, np.nanmean,
            np.sum, np.nansum, len, np.count_nonzero,
            # TODO: var-based operations failing this test
            # np.std, np.nanstd, np.var, np.nanvar
        ]:
            data = self.table.dframe()
            expected = self.table.clone(
                data=data
            ).aggregate("Gender", agg).sort()

            result = self.table.aggregate("Gender", agg).sort()

            self.compare_dataset(expected, result, msg=str(agg))

    if not IbisInterface.has_rowid():

        def test_dataset_iloc_slice_rows_slice_cols(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_slice_rows_list_cols(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_slice_rows_index_cols(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_slice_rows(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_list_rows_slice_cols(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_list_rows_list_cols_by_name(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_list_rows_list_cols(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_list_rows(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_list_cols_by_name(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_list_cols(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_index_rows_slice_cols(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_index_rows_index_cols(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_ellipsis_list_cols_by_name(self):
            raise SkipTest("Not supported")

        def test_dataset_iloc_ellipsis_list_cols(self):
            raise SkipTest("Not supported")

        def test_dataset_boolean_index(self):
            raise SkipTest("Not supported")