Пример #1
0
    def test_isna(self, data_missing):
        sarr = SparseArray(data_missing)
        expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([True, False], dtype=expected_dtype)
        result = sarr.isna()
        tm.assert_sp_array_equal(result, expected)

        # test isna for arr without na
        sarr = sarr.fillna(0)
        expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([False, False], fill_value=False, dtype=expected_dtype)
        self.assert_equal(sarr.isna(), expected)
def coo_to_sparse_series(A, dense_index: bool = False):
    """
    Convert a scipy.sparse.coo_matrix to a SparseSeries.

    Parameters
    ----------
    A : scipy.sparse.coo.coo_matrix
    dense_index : bool, default False

    Returns
    -------
    Series

    Raises
    ------
    TypeError if A is not a coo_matrix
    """
    from pandas import SparseDtype

    try:
        s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
    except AttributeError as err:
        raise TypeError(
            f"Expected coo_matrix. Got {type(A).__name__} instead.") from err
    s = s.sort_index()
    s = s.astype(SparseDtype(s.dtype))
    if dense_index:
        # is there a better constructor method to use here?
        i = range(A.shape[0])
        j = range(A.shape[1])
        ind = MultiIndex.from_product([i, j])
        s = s.reindex(ind)
    return s
Пример #3
0
 def test_astype_str(self, data):
     with tm.assert_produces_warning(FutureWarning,
                                     match="astype from Sparse"):
         result = pd.Series(data[:5]).astype(str)
     expected_dtype = SparseDtype(str, str(data.fill_value))
     expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype)
     self.assert_series_equal(result, expected)
Пример #4
0
def _coo_to_sparse_series(A,
                          dense_index: bool = False,
                          sparse_series: bool = True):
    """
    Convert a scipy.sparse.coo_matrix to a SparseSeries.

    Parameters
    ----------
    A : scipy.sparse.coo.coo_matrix
    dense_index : bool, default False
    sparse_series : bool, default True

    Returns
    -------
    Series or SparseSeries
    """
    from pandas import SparseDtype

    s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
    s = s.sort_index()
    if sparse_series:
        # TODO(SparseSeries): remove this and the sparse_series keyword.
        # This is just here to avoid a DeprecationWarning when
        # _coo_to_sparse_series is called via Series.sparse.from_coo
        s = s.to_sparse()  # TODO: specify kind?
    else:
        s = s.astype(SparseDtype(s.dtype))
    if dense_index:
        # is there a better constructor method to use here?
        i = range(A.shape[0])
        j = range(A.shape[1])
        ind = MultiIndex.from_product([i, j])
        s = s.reindex(ind)
    return s
Пример #5
0
    def test_with_datetimelikes(self):

        df = self.klass({'A': date_range('20130101', periods=10),
                         'B': timedelta_range('1 day', periods=10)})
        t = df.T

        result = t.dtypes.value_counts()
        if self.klass is DataFrame:
            expected = Series({np.dtype('object'): 10})
        else:
            expected = Series({SparseDtype(dtype=object): 10})
        tm.assert_series_equal(result, expected)
Пример #6
0
    def test_subclass_sparse_slice(self):
        # int64
        s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
        exp = tm.SubclassedSparseSeries([2, 3, 4], index=[1, 2, 3])
        tm.assert_sp_series_equal(s.loc[1:3], exp)
        assert s.loc[1:3].dtype == SparseDtype(np.int64)

        exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
        tm.assert_sp_series_equal(s.iloc[1:3], exp)
        assert s.iloc[1:3].dtype == SparseDtype(np.int64)

        exp = tm.SubclassedSparseSeries([2, 3], index=[1, 2])
        tm.assert_sp_series_equal(s[1:3], exp)
        assert s[1:3].dtype == SparseDtype(np.int64)

        # float64
        s = tm.SubclassedSparseSeries([1., 2., 3., 4., 5.])
        exp = tm.SubclassedSparseSeries([2., 3., 4.], index=[1, 2, 3])
        tm.assert_sp_series_equal(s.loc[1:3], exp)
        assert s.loc[1:3].dtype == SparseDtype(np.float64)

        exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
        tm.assert_sp_series_equal(s.iloc[1:3], exp)
        assert s.iloc[1:3].dtype == SparseDtype(np.float64)

        exp = tm.SubclassedSparseSeries([2., 3.], index=[1, 2])
        tm.assert_sp_series_equal(s[1:3], exp)
        assert s[1:3].dtype == SparseDtype(np.float64)
Пример #7
0
    def test_with_datetimelikes(self):

        df = self.klass({
            "A": date_range("20130101", periods=10),
            "B": timedelta_range("1 day", periods=10),
        })
        t = df.T

        result = t.dtypes.value_counts()
        if self.klass is DataFrame:
            expected = Series({np.dtype("object"): 10})
        else:
            expected = Series({SparseDtype(dtype=object): 10})
        tm.assert_series_equal(result, expected)
Пример #8
0
    def test_isna(self, data_missing):
        expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value))
        expected = SparseArray([True, False], dtype=expected_dtype)

        result = pd.isna(data_missing)
        self.assert_equal(result, expected)

        result = pd.Series(data_missing).isna()
        expected = pd.Series(expected)
        self.assert_series_equal(result, expected)

        # GH 21189
        result = pd.Series(data_missing).drop([0, 1]).isna()
        expected = pd.Series([], dtype=expected_dtype)
        self.assert_series_equal(result, expected)
Пример #9
0
    def test_fillna_frame(self, data_missing):
        # Have to override to specify that fill_value will change.
        fill_value = data_missing[1]

        result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value)

        if pd.isna(data_missing.fill_value):
            dtype = SparseDtype(data_missing.dtype, fill_value)
        else:
            dtype = data_missing.dtype

        expected = pd.DataFrame(
            {
                "A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype),
                "B": [1, 2],
            }
        )

        self.assert_frame_equal(result, expected)
Пример #10
0
    def test_where_series(self, data, na_value):
        assert data[0] != data[1]
        cls = type(data)
        a, b = data[:2]

        ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype))

        cond = np.array([True, True, False, False])
        result = ser.where(cond)

        new_dtype = SparseDtype("float", 0.0)
        expected = pd.Series(
            cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype)
        )
        self.assert_series_equal(result, expected)

        other = cls._from_sequence([a, b, a, b], dtype=data.dtype)
        cond = np.array([True, False, True, True])
        result = ser.where(cond, other)
        expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype))
        self.assert_series_equal(result, expected)
Пример #11
0
def dtype():
    return SparseDtype()
Пример #12
0
 def test_astype_str(self, data):
     result = pd.Series(data[:5]).astype(str)
     expected_dtype = SparseDtype(str, str(data.fill_value))
     expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype)
     self.assert_series_equal(result, expected)
Пример #13
0
 def _check_unsupported(self, data):
     if data.dtype == SparseDtype(int, 0):
         pytest.skip("Can't store nan in int array.")