def test_isna(self, data_missing): sarr = SparseArray(data_missing) expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([True, False], dtype=expected_dtype) result = sarr.isna() tm.assert_sp_array_equal(result, expected) # test isna for arr without na sarr = sarr.fillna(0) expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([False, False], fill_value=False, dtype=expected_dtype) self.assert_equal(sarr.isna(), expected)
def coo_to_sparse_series(A, dense_index: bool = False): """ Convert a scipy.sparse.coo_matrix to a SparseSeries. Parameters ---------- A : scipy.sparse.coo.coo_matrix dense_index : bool, default False Returns ------- Series Raises ------ TypeError if A is not a coo_matrix """ from pandas import SparseDtype try: s = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) except AttributeError as err: raise TypeError( f"Expected coo_matrix. Got {type(A).__name__} instead.") from err s = s.sort_index() s = s.astype(SparseDtype(s.dtype)) if dense_index: # is there a better constructor method to use here? i = range(A.shape[0]) j = range(A.shape[1]) ind = MultiIndex.from_product([i, j]) s = s.reindex(ind) return s
def test_astype_str(self, data): with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"): result = pd.Series(data[:5]).astype(str) expected_dtype = SparseDtype(str, str(data.fill_value)) expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype) self.assert_series_equal(result, expected)
def _coo_to_sparse_series(A, dense_index: bool = False, sparse_series: bool = True): """ Convert a scipy.sparse.coo_matrix to a SparseSeries. Parameters ---------- A : scipy.sparse.coo.coo_matrix dense_index : bool, default False sparse_series : bool, default True Returns ------- Series or SparseSeries """ from pandas import SparseDtype s = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) s = s.sort_index() if sparse_series: # TODO(SparseSeries): remove this and the sparse_series keyword. # This is just here to avoid a DeprecationWarning when # _coo_to_sparse_series is called via Series.sparse.from_coo s = s.to_sparse() # TODO: specify kind? else: s = s.astype(SparseDtype(s.dtype)) if dense_index: # is there a better constructor method to use here? i = range(A.shape[0]) j = range(A.shape[1]) ind = MultiIndex.from_product([i, j]) s = s.reindex(ind) return s
def test_with_datetimelikes(self): df = self.klass({'A': date_range('20130101', periods=10), 'B': timedelta_range('1 day', periods=10)}) t = df.T result = t.dtypes.value_counts() if self.klass is DataFrame: expected = Series({np.dtype('object'): 10}) else: expected = Series({SparseDtype(dtype=object): 10}) tm.assert_series_equal(result, expected)
def test_subclass_sparse_slice(self): # int64 s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5]) exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) assert s.loc[1:3].dtype == SparseDtype(np.int64) exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) assert s.iloc[1:3].dtype == SparseDtype(np.int64) exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) assert s[1:3].dtype == SparseDtype(np.int64) # float64 s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.]) exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3]) tm.assert_sp_series_equal(s.loc[1:3], exp) assert s.loc[1:3].dtype == SparseDtype(np.float64) exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s.iloc[1:3], exp) assert s.iloc[1:3].dtype == SparseDtype(np.float64) exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2]) tm.assert_sp_series_equal(s[1:3], exp) assert s[1:3].dtype == SparseDtype(np.float64)
def test_with_datetimelikes(self): df = self.klass({ "A": date_range("20130101", periods=10), "B": timedelta_range("1 day", periods=10), }) t = df.T result = t.dtypes.value_counts() if self.klass is DataFrame: expected = Series({np.dtype("object"): 10}) else: expected = Series({SparseDtype(dtype=object): 10}) tm.assert_series_equal(result, expected)
def test_isna(self, data_missing): expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) expected = SparseArray([True, False], dtype=expected_dtype) result = pd.isna(data_missing) self.assert_equal(result, expected) result = pd.Series(data_missing).isna() expected = pd.Series(expected) self.assert_series_equal(result, expected) # GH 21189 result = pd.Series(data_missing).drop([0, 1]).isna() expected = pd.Series([], dtype=expected_dtype) self.assert_series_equal(result, expected)
def test_fillna_frame(self, data_missing): # Have to override to specify that fill_value will change. fill_value = data_missing[1] result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value) if pd.isna(data_missing.fill_value): dtype = SparseDtype(data_missing.dtype, fill_value) else: dtype = data_missing.dtype expected = pd.DataFrame( { "A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype), "B": [1, 2], } ) self.assert_frame_equal(result, expected)
def test_where_series(self, data, na_value): assert data[0] != data[1] cls = type(data) a, b = data[:2] ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) cond = np.array([True, True, False, False]) result = ser.where(cond) new_dtype = SparseDtype("float", 0.0) expected = pd.Series( cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype) ) self.assert_series_equal(result, expected) other = cls._from_sequence([a, b, a, b], dtype=data.dtype) cond = np.array([True, False, True, True]) result = ser.where(cond, other) expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) self.assert_series_equal(result, expected)
def dtype(): return SparseDtype()
def test_astype_str(self, data): result = pd.Series(data[:5]).astype(str) expected_dtype = SparseDtype(str, str(data.fill_value)) expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype) self.assert_series_equal(result, expected)
def _check_unsupported(self, data): if data.dtype == SparseDtype(int, 0): pytest.skip("Can't store nan in int array.")