def test_from_scipy_fillna(spmatrix): # GH 16112 arr = np.eye(3) arr[1:, 0] = np.nan try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = SparseDataFrame(spm).fillna(-1.0) # Returning frame should fill all nan values with -1.0 expected = SparseDataFrame( { 0: SparseSeries([1., -1, -1]), 1: SparseSeries([np.nan, 1, np.nan]), 2: SparseSeries([np.nan, np.nan, 1]), }, default_fill_value=-1) # fill_value is expected to be what .fillna() above was called with # We don't use -1 as initial fill_value in expected SparseSeries # construction because this way we obtain "compressed" SparseArrays, # avoiding having to construct them ourselves for col in expected: expected[col].fill_value = -1 tm.assert_sp_frame_equal(sdf, expected)
def _create_sp_series(): nan = np.nan # nan-based arr = np.arange(15, dtype=np.float64) arr[7:12] = nan arr[-1:] = nan bseries = SparseSeries(arr, kind='block') bseries.name = u'bseries' return bseries
def _create_sp_series(): nan = np.nan # nan-based arr = np.arange(15, dtype=np.float64) arr[7:12] = nan arr[-1:] = nan bseries = SparseSeries(arr, kind='block') bseries.name = 'bseries' return bseries
def _create_sp_tsseries(): nan = np.nan # nan-based arr = np.arange(15, dtype=np.float64) arr[7:12] = nan arr[-1:] = nan date_index = bdate_range('1/1/2011', periods=len(arr)) bseries = SparseSeries(arr, index=date_index, kind='block') bseries.name = u'btsseries' return bseries
def _create_sp_tsseries(): nan = np.nan # nan-based arr = np.arange(15, dtype=np.float64) arr[7:12] = nan arr[-1:] = nan date_index = bdate_range('1/1/2011', periods=len(arr)) bseries = SparseSeries(arr, index=date_index, kind='block') bseries.name = 'btsseries' return bseries
def test_where_with_numeric_data_and_other(data, other): # GH 17386 lower_bound = 1.5 sparse = SparseSeries(data) result = sparse.where(sparse > lower_bound, other) dense = Series(data) dense_expected = dense.where(dense > lower_bound, other) sparse_expected = SparseSeries(dense_expected, fill_value=other) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def test_where_with_numeric_data(data): # GH 17386 lower_bound = 1.5 sparse = SparseSeries(data) result = sparse.where(sparse > lower_bound) dense = Series(data) dense_expected = dense.where(dense > lower_bound) sparse_expected = SparseSeries(dense_expected) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def test_where_with_bool_data(): # GH 17386 data = [False, False, True, True, False, False] cond = True sparse = SparseSeries(data) result = sparse.where(sparse == cond) dense = Series(data) dense_expected = dense.where(dense == cond) sparse_expected = SparseSeries(dense_expected) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def test_where_with_bool_data_and_other(other): # GH 17386 data = [False, False, True, True, False, False] cond = True sparse = SparseSeries(data) result = sparse.where(sparse == cond, other) dense = Series(data) dense_expected = dense.where(dense == cond, other) sparse_expected = SparseSeries(dense_expected, fill_value=other) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def _create_sp_series(): import numpy as np from pandas import SparseSeries nan = np.nan # nan-based arr = np.arange(15, dtype=np.float64) arr[7:12] = nan arr[-1:] = nan bseries = SparseSeries(arr, kind='block') bseries.name = 'bseries' return bseries
def test_type_of_target(): for group, group_examples in EXAMPLES.items(): for example in group_examples: assert_equal(type_of_target(example), group, msg=('type_of_target(%r) should be %r, got %r' % (example, group, type_of_target(example)))) for example in NON_ARRAY_LIKE_EXAMPLES: msg_regex = r'Expected array-like \(array or non-string sequence\).*' assert_raises_regex(ValueError, msg_regex, type_of_target, example) for example in MULTILABEL_SEQUENCES: msg = ('You appear to be using a legacy multi-label data ' 'representation. Sequence of sequences are no longer supported;' ' use a binary array or sparse matrix instead.') assert_raises_regex(ValueError, msg, type_of_target, example) try: from pandas import SparseSeries except ImportError: raise SkipTest("Pandas not found") y = SparseSeries([1, 0, 0, 1, 0]) msg = "y cannot be class 'SparseSeries'." assert_raises_regex(ValueError, msg, type_of_target, y)
def _create_sp_series(): import numpy as np from pandas import bdate_range, SparseSeries nan = np.nan # nan-based arr = np.arange(15, dtype=float) index = np.arange(15) arr[7:12] = nan arr[-1:] = nan date_index = bdate_range('1/1/2011', periods=len(index)) bseries = SparseSeries(arr, index=index, kind='block') bseries.name = 'bseries' return bseries
def setup(self): K = 50 N = 50001 rng = date_range('1/1/2000', periods=N, freq='T') self.series = {} for i in range(1, K): data = np.random.randn(N)[:-i] idx = rng[:-i] data[100:] = np.nan self.series[i] = SparseSeries(data, index=idx)
def setup(self): self.K = 50 self.N = 50000 self.rng = np.asarray(date_range('1/1/2000', periods=self.N, freq='T')) self.series = {} for i in range(1, (self.K + 1)): self.data = np.random.randn(self.N)[:(-i)] self.this_rng = self.rng[:(-i)] self.data[100:] = np.nan self.series[i] = SparseSeries(self.data, index=self.this_rng)
def test_quantile(): # GH 17386 data = [[1, 1], [2, 10], [3, 100], [np.nan, np.nan]] q = 0.1 sparse_df = SparseDataFrame(data) result = sparse_df.quantile(q) dense_df = DataFrame(data) dense_expected = dense_df.quantile(q) sparse_expected = SparseSeries(dense_expected) tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected)
def time_sparse_series_from_coo(self): self.ss = SparseSeries.from_coo(self.A)
def time_sparse_series_from_coo(self): SparseSeries.from_coo(self.matrix)