Exemplo n.º 1
0
class TestSparseDataFrameAnalytics(object):
    def setup_method(self, method):
        self.data = {
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            'C': np.arange(10, dtype=float),
            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]
        }

        self.dates = bdate_range('1/1/2011', periods=10)

        self.frame = SparseDataFrame(self.data, index=self.dates)

    def test_cumsum(self):
        expected = SparseDataFrame(self.frame.to_dense().cumsum())

        result = self.frame.cumsum()
        tm.assert_sp_frame_equal(result, expected)

        result = self.frame.cumsum(axis=None)
        tm.assert_sp_frame_equal(result, expected)

        result = self.frame.cumsum(axis=0)
        tm.assert_sp_frame_equal(result, expected)

    def test_numpy_cumsum(self):
        result = np.cumsum(self.frame)
        expected = SparseDataFrame(self.frame.to_dense().cumsum())
        tm.assert_sp_frame_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        tm.assert_raises_regex(ValueError,
                               msg,
                               np.cumsum,
                               self.frame,
                               dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        tm.assert_raises_regex(ValueError,
                               msg,
                               np.cumsum,
                               self.frame,
                               out=result)

    def test_numpy_func_call(self):
        # no exception should be raised even though
        # numpy passes in 'axis=None' or `axis=-1'
        funcs = [
            'sum', 'cumsum', 'var', 'mean', 'prod', 'cumprod', 'std', 'min',
            'max'
        ]
        for func in funcs:
            getattr(np, func)(self.frame)
Exemplo n.º 2
0
class TestSparseDataFrameAnalytics(tm.TestCase):
    def setUp(self):
        self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
                     'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
                     'C': np.arange(10, dtype=float),
                     'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}

        self.dates = bdate_range('1/1/2011', periods=10)

        self.frame = SparseDataFrame(self.data, index=self.dates)

    def test_cumsum(self):
        expected = SparseDataFrame(self.frame.to_dense().cumsum())

        result = self.frame.cumsum()
        tm.assert_sp_frame_equal(result, expected)

        result = self.frame.cumsum(axis=None)
        tm.assert_sp_frame_equal(result, expected)

        result = self.frame.cumsum(axis=0)
        tm.assert_sp_frame_equal(result, expected)

    def test_numpy_cumsum(self):
        result = np.cumsum(self.frame)
        expected = SparseDataFrame(self.frame.to_dense().cumsum())
        tm.assert_sp_frame_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
                              self.frame, dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
                              self.frame, out=result)

    def test_numpy_func_call(self):
        # no exception should be raised even though
        # numpy passes in 'axis=None' or `axis=-1'
        funcs = ['sum', 'cumsum', 'var',
                 'mean', 'prod', 'cumprod',
                 'std', 'min', 'max']
        for func in funcs:
            getattr(np, func)(self.frame)
Exemplo n.º 3
0
class TestSparseDataFrameAnalytics(object):
    def setup_method(self, method):
        self.data = {
            'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
            'C': np.arange(10, dtype=float),
            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]
        }

        self.dates = bdate_range('1/1/2011', periods=10)

        self.frame = SparseDataFrame(self.data, index=self.dates)

    def test_cumsum(self):
        expected = SparseDataFrame(self.frame.to_dense().cumsum())

        result = self.frame.cumsum()
        tm.assert_sp_frame_equal(result, expected)

        result = self.frame.cumsum(axis=None)
        tm.assert_sp_frame_equal(result, expected)

        result = self.frame.cumsum(axis=0)
        tm.assert_sp_frame_equal(result, expected)

    def test_numpy_cumsum(self):
        result = np.cumsum(self.frame)
        expected = SparseDataFrame(self.frame.to_dense().cumsum())
        tm.assert_sp_frame_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        tm.assert_raises_regex(ValueError,
                               msg,
                               np.cumsum,
                               self.frame,
                               dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        tm.assert_raises_regex(ValueError,
                               msg,
                               np.cumsum,
                               self.frame,
                               out=result)

    def test_numpy_func_call(self):
        # no exception should be raised even though
        # numpy passes in 'axis=None' or `axis=-1'
        funcs = [
            'sum', 'cumsum', 'var', 'mean', 'prod', 'cumprod', 'std', 'min',
            'max'
        ]
        for func in funcs:
            getattr(np, func)(self.frame)

    @pytest.mark.xfail(reason='Wrong SparseBlock initialization ' '(GH 17386)')
    def test_quantile(self):
        # GH 17386
        data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
        q = 0.1

        sparse_df = SparseDataFrame(data)
        result = sparse_df.quantile(q)

        dense_df = DataFrame(data)
        dense_expected = dense_df.quantile(q)
        sparse_expected = SparseSeries(dense_expected)

        tm.assert_series_equal(result, dense_expected)
        tm.assert_sp_series_equal(result, sparse_expected)

    @pytest.mark.xfail(reason='Wrong SparseBlock initialization ' '(GH 17386)')
    def test_quantile_multi(self):
        # GH 17386
        data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
        q = [0.1, 0.5]

        sparse_df = SparseDataFrame(data)
        result = sparse_df.quantile(q)

        dense_df = DataFrame(data)
        dense_expected = dense_df.quantile(q)
        sparse_expected = SparseDataFrame(dense_expected)

        tm.assert_frame_equal(result, dense_expected)
        tm.assert_sp_frame_equal(result, sparse_expected)
Exemplo n.º 4
0
class TestSparseDataFrameAnalytics(object):
    def setup_method(self, method):
        self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
                     'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
                     'C': np.arange(10, dtype=float),
                     'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}

        self.dates = bdate_range('1/1/2011', periods=10)

        self.frame = SparseDataFrame(self.data, index=self.dates)

    def test_cumsum(self):
        expected = SparseDataFrame(self.frame.to_dense().cumsum())

        result = self.frame.cumsum()
        tm.assert_sp_frame_equal(result, expected)

        result = self.frame.cumsum(axis=None)
        tm.assert_sp_frame_equal(result, expected)

        result = self.frame.cumsum(axis=0)
        tm.assert_sp_frame_equal(result, expected)

    def test_numpy_cumsum(self):
        result = np.cumsum(self.frame)
        expected = SparseDataFrame(self.frame.to_dense().cumsum())
        tm.assert_sp_frame_equal(result, expected)

        msg = "the 'dtype' parameter is not supported"
        tm.assert_raises_regex(ValueError, msg, np.cumsum,
                               self.frame, dtype=np.int64)

        msg = "the 'out' parameter is not supported"
        tm.assert_raises_regex(ValueError, msg, np.cumsum,
                               self.frame, out=result)

    def test_numpy_func_call(self):
        # no exception should be raised even though
        # numpy passes in 'axis=None' or `axis=-1'
        funcs = ['sum', 'cumsum', 'var',
                 'mean', 'prod', 'cumprod',
                 'std', 'min', 'max']
        for func in funcs:
            getattr(np, func)(self.frame)

    @pytest.mark.xfail(reason='Wrong SparseBlock initialization '
                              '(GH 17386)')
    def test_quantile(self):
        # GH 17386
        data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
        q = 0.1

        sparse_df = SparseDataFrame(data)
        result = sparse_df.quantile(q)

        dense_df = DataFrame(data)
        dense_expected = dense_df.quantile(q)
        sparse_expected = SparseSeries(dense_expected)

        tm.assert_series_equal(result, dense_expected)
        tm.assert_sp_series_equal(result, sparse_expected)

    @pytest.mark.xfail(reason='Wrong SparseBlock initialization '
                              '(GH 17386)')
    def test_quantile_multi(self):
        # GH 17386
        data = [[1, 1], [2, 10], [3, 100], [nan, nan]]
        q = [0.1, 0.5]

        sparse_df = SparseDataFrame(data)
        result = sparse_df.quantile(q)

        dense_df = DataFrame(data)
        dense_expected = dense_df.quantile(q)
        sparse_expected = SparseDataFrame(dense_expected)

        tm.assert_frame_equal(result, dense_expected)
        tm.assert_sp_frame_equal(result, sparse_expected)

    def test_assign_with_sparse_frame(self):
        # GH 19163
        df = pd.DataFrame({"a": [1, 2, 3]})
        res = df.to_sparse(fill_value=False).assign(newcol=False)
        exp = df.assign(newcol=False).to_sparse(fill_value=False)

        tm.assert_sp_frame_equal(res, exp)

        for column in res.columns:
            assert type(res[column]) is SparseSeries