示例#1
0
    def set_value(self, label, value, takeable=False):
        """
        Quickly set single value at passed label. If label is not contained, a
        new object is created with the label placed at the end of the result
        index

        Parameters
        ----------
        label : object
            Partial indexing with MultiIndex not allowed
        value : object
            Scalar value
        takeable : interpret the index as indexers, default False

        Notes
        -----
        This method *always* returns a new object. It is not particularly
        efficient but is provided for API compatibility with Series

        Returns
        -------
        series : SparseSeries
        """
        values = self.to_dense()

        # if the label doesn't exist, we will create a new object here
        # and possibily change the index
        new_values = values.set_value(label, value, takeable=takeable)
        if new_values is not None:
            values = new_values
        new_index = values.index
        values = SparseArray(values, fill_value=self.fill_value,
                             kind=self.kind)
        self._data = SingleBlockManager(values, new_index)
        self._index = new_index
示例#2
0
def create_single_mgr(typestr, num_rows=None):
    if num_rows is None:
        num_rows = N

    return SingleBlockManager(
        create_block(typestr, placement=slice(0, num_rows), item_shape=()),
        Index(np.arange(num_rows)),
    )
def test_concat_series():
    # GH17728
    values = np.arange(3, dtype="int64")
    block = CustomBlock(values, placement=slice(0, 3))
    mgr = SingleBlockManager(block, pd.RangeIndex(3))
    s = pd.Series(mgr, pd.RangeIndex(3), fastpath=True)

    res = pd.concat([s, s])
    assert isinstance(res._data.blocks[0], CustomBlock)
示例#4
0
    def __new__(cls, data=None, index=None, crs=None, **kwargs):
        # we need to use __new__ because we want to return Series instance
        # instead of GeoSeries instance in case of non-geometry data
        if isinstance(data, SingleBlockManager):
            if isinstance(data.blocks[0].dtype, GeometryDtype):
                if not PANDAS_GE_024 and (data.blocks[0].ndim == 2):
                    # bug in pandas 0.23 where in certain indexing operations
                    # (such as .loc) a 2D ExtensionBlock (still with 1D values
                    # is created) which results in other failures
                    from pandas.core.internals import ExtensionBlock
                    values = data.blocks[0].values
                    block = ExtensionBlock(values, slice(0, len(values), 1))
                    data = SingleBlockManager([block],
                                              data.axes[0],
                                              fastpath=True)
                self = super(GeoSeries, cls).__new__(cls)
                super(GeoSeries, self).__init__(data, index=index, **kwargs)
                self.crs = crs
                return self
            return Series(data, index=index, **kwargs)

        if isinstance(data, BaseGeometry):
            # fix problem for scalar geometries passed, ensure the list of
            # scalars is of correct length if index is specified
            n = len(index) if index is not None else 1
            data = [data] * n

        name = kwargs.pop('name', None)

        if not is_geometry_type(data):
            # if data is None and dtype is specified (eg from empty overlay
            # test), specifying dtype raises an error:
            # https://github.com/pandas-dev/pandas/issues/26469
            kwargs.pop('dtype', None)
            # Use Series constructor to handle input data
            s = pd.Series(data, index=index, name=name, **kwargs)
            # prevent trying to convert non-geometry objects
            if s.dtype != object:
                if s.empty:
                    s = s.astype(object)
                else:
                    return s
            # try to convert to GeometryArray, if fails return plain Series
            try:
                data = from_shapely(s.values)
            except TypeError:
                return s
            index = s.index
            name = s.name

        self = super(GeoSeries, cls).__new__(cls)
        super(GeoSeries, self).__init__(data, index=index, name=name, **kwargs)
        self.crs = crs
        self._invalidate_sindex()
        return self
示例#5
0
    def _set_value(self, label, value, takeable=False):
        values = self.to_dense()

        # if the label doesn't exist, we will create a new object here
        # and possibly change the index
        new_values = values._set_value(label, value, takeable=takeable)
        if new_values is not None:
            values = new_values
        new_index = values.index
        values = SparseArray(values, fill_value=self.fill_value, kind=self.kind)
        self._data = SingleBlockManager(values, new_index)
        self._index = new_index
示例#6
0
文件: series.py 项目: Xbar/pandas
    def _set_value(self, label, value, takeable=False):
        values = self.to_dense()

        # if the label doesn't exist, we will create a new object here
        # and possibily change the index
        new_values = values._set_value(label, value, takeable=takeable)
        if new_values is not None:
            values = new_values
        new_index = values.index
        values = SparseArray(values, fill_value=self.fill_value,
                             kind=self.kind)
        self._data = SingleBlockManager(values, new_index)
        self._index = new_index
示例#7
0
    def _set_values(self, key, value):

        # this might be inefficient as we have to recreate the sparse array
        # rather than setting individual elements, but have to convert
        # the passed slice/boolean that's in dense space into a sparse indexer
        # not sure how to do that!
        if isinstance(key, Series):
            key = key.values

        values = self.values.to_dense()
        values[key] = libindex.convert_scalar(values, value)
        values = SparseArray(values, fill_value=self.fill_value, kind=self.kind)
        self._data = SingleBlockManager(values, self.index)
示例#8
0
    def _set_values(self, key, value):

        # this might be inefficient as we have to recreate the sparse array
        # rather than setting individual elements, but have to convert
        # the passed slice/boolean that's in dense space into a sparse indexer
        # not sure how to do that!
        if isinstance(key, Series):
            key = key.values

        values = self.values.to_dense()
        values[key] = _index.convert_scalar(values, value)
        values = SparseArray(values, fill_value=self.fill_value, kind=self.kind)
        self._data = SingleBlockManager(values, self.index)
示例#9
0
def test_custom_repr():
    values = np.arange(3, dtype='int64')

    # series
    block = CustomBlock(values, placement=slice(0, 3))

    s = pd.Series(SingleBlockManager(block, pd.RangeIndex(3)))
    assert repr(s) == '0    Val: 0\n1    Val: 1\n2    Val: 2\ndtype: int64'

    # dataframe
    block = CustomBlock(values, placement=slice(0, 1))
    blk_mgr = BlockManager([block], [['col'], range(3)])
    df = pd.DataFrame(blk_mgr)
    assert repr(df) == '      col\n0  Val: 0\n1  Val: 1\n2  Val: 2'
示例#10
0
    def set_value(self, label, value, takeable=False):
        """
        Quickly set single value at passed label. If label is not contained, a
        new object is created with the label placed at the end of the result
        index

        Parameters
        ----------
        label : object
            Partial indexing with MultiIndex not allowed
        value : object
            Scalar value
        takeable : interpret the index as indexers, default False

        Notes
        -----
        This method *always* returns a new object. It is not particularly
        efficient but is provided for API compatibility with Series

        Returns
        -------
        series : SparseSeries
        """
        values = self.to_dense()

        # if the label doesn't exist, we will create a new object here
        # and possibily change the index
        new_values = values.set_value(label, value, takeable=takeable)
        if new_values is not None:
            values = new_values
        new_index = values.index
        values = SparseArray(values,
                             fill_value=self.fill_value,
                             kind=self.kind)
        self._data = SingleBlockManager(values, new_index)
        self._index = new_index
示例#11
0
    def __getitem__(self, key):

        if _HAS_EXTENSION_ARRAY:
            return super(GeoSeries, self).__getitem__(key)
        else:
            if isinstance(key, (slice, list, Series, np.ndarray)):
                block = self._data._block._getitem(key)
                index = self.index[key]
                return GeoSeries(SingleBlockManager(block, axis=index),
                                 crs=self.crs,
                                 index=index)
            try:
                if key in self.index:
                    loc = self.index.get_loc(key)
                    return self._geometry_array[loc]
            except TypeError:
                pass
            raise KeyError(key)
示例#12
0
    def sparse_reindex(self, new_index):
        """
        Conform sparse values to new SparseIndex

        Parameters
        ----------
        new_index : {BlockIndex, IntIndex}

        Returns
        -------
        reindexed : SparseSeries
        """
        if not isinstance(new_index, splib.SparseIndex):
            raise TypeError('new index must be a SparseIndex')

        block = self.block.sparse_reindex(new_index)
        new_data = SingleBlockManager(block, self.index)
        return self._constructor(new_data, index=self.index,
                                 sparse_index=new_index,
                                 fill_value=self.fill_value).__finalize__(self)
示例#13
0
    def sparse_reindex(self, new_index):
        """
        Conform sparse values to new SparseIndex

        Parameters
        ----------
        new_index : {BlockIndex, IntIndex}

        Returns
        -------
        reindexed : SparseSeries
        """
        if not (isinstance(new_index, splib.SparseIndex)):
            raise AssertionError()

        block = self.block.sparse_reindex(new_index)
        new_data = SingleBlockManager(block, block.ref_items)
        return self._constructor(new_data, index=self.index,
                                 sparse_index=new_index,
                                 fill_value=self.fill_value)
示例#14
0
    def _unpickle_series_compat(self, state):

        nd_state, own_state = state

        # recreate the ndarray
        data = np.empty(nd_state[1], dtype=nd_state[2])
        np.ndarray.__setstate__(data, nd_state)

        index, fill_value, sp_index = own_state[:3]
        name = None
        if len(own_state) > 3:
            name = own_state[3]

        # create a sparse array
        if not isinstance(data, SparseArray):
            data = SparseArray(data, sparse_index=sp_index,
                               fill_value=fill_value, copy=False)

        # recreate
        data = SingleBlockManager(data, index, fastpath=True)
        generic.NDFrame.__init__(self, data)

        self._set_axis(0, index)
        self.name = name
示例#15
0
    def __init__(self, data=None, index=None, sparse_index=None, kind='block',
                 fill_value=None, name=None, dtype=None, copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            if isinstance(data, SparseArray):
                if index is not None:
                    assert (len(index) == len(data))
                sparse_index = data.sp_index
                if fill_value is None:
                    fill_value = data.fill_value

                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()
                if fill_value is None:
                    fill_value = data.fill_value
                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                if index is None:
                    index = data.index.view()

                data = Series(data)
                res = make_sparse(data, kind=kind, fill_value=fill_value)
                data, sparse_index, fill_value = res

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    res = make_sparse(data, kind=kind, fill_value=fill_value)
                    data, sparse_index, fill_value = res
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                else:

                    data = data.reindex(index, copy=False)

            else:
                length = len(index)

                if data == fill_value or (isnull(data) and isnull(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data, sparse_index=sparse_index,
                                       fill_value=fill_value, dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name
示例#16
0
文件: series.py 项目: 5i7788/pandas
class SparseSeries(Series):

    """Data structure for labeled, sparse floating point data

    Parameters
    ----------
    data : {array-like, Series, SparseSeries, dict}
    kind : {'block', 'integer'}
    fill_value : float
        Defaults to NaN (code for missing)
    sparse_index : {BlockIndex, IntIndex}, optional
        Only if you have one. Mainly used internally

    Notes
    -----
    SparseSeries objects are immutable via the typical Python means. If you
    must change values, convert to dense, make your changes, then convert back
    to sparse
    """
    _subtyp = 'sparse_series'

    def __init__(self, data, index=None, sparse_index=None, kind='block',
                 fill_value=None, name=None, dtype=None, copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined
            data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()
        else:

            is_sparse_array = isinstance(data, SparseArray)
            if fill_value is None:
                if is_sparse_array:
                    fill_value = data.fill_value
                else:
                    fill_value = nan

            if is_sparse_array:
                if isinstance(data, SparseSeries) and index is None:
                    index = data.index.view()
                elif index is not None:
                    assert(len(index) == len(data))

                sparse_index = data.sp_index
                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()

                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                if index is None:
                    index = data.index.view()

                data = Series(data)
                data, sparse_index = make_sparse(data, kind=kind,
                                                 fill_value=fill_value)

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    data, sparse_index = make_sparse(data, kind=kind,
                                                     fill_value=fill_value)
                else:
                    assert(len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                else:
                    data = data.reindex(index, copy=False)

            else:

                length = len(index)

                if data == fill_value or (isnull(data)
                                          and isnull(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(
                        data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name

    @property
    def values(self):
        """ return the array """
        return self._data._values

    def __array__(self, result=None):
        """ the array interface, return my values """
        return self._data._values

    def get_values(self):
        """ same as values """
        return self._data._values.to_dense().view()

    @property
    def block(self):
        return self._data._block

    @property
    def fill_value(self):
        return self.block.fill_value

    @fill_value.setter
    def fill_value(self, v):
        self.block.fill_value = v

    @property
    def sp_index(self):
        return self.block.sp_index

    @property
    def sp_values(self):
        return self.values.sp_values

    @property
    def npoints(self):
        return self.sp_index.npoints

    @classmethod
    def from_array(cls, arr, index=None, name=None, copy=False, fill_value=None, fastpath=False):
        """
        Simplified alternate constructor
        """
        return cls(arr, index=index, name=name, copy=copy, fill_value=fill_value, fastpath=fastpath)

    @property
    def _constructor(self):
        return SparseSeries

    @property
    def kind(self):
        if isinstance(self.sp_index, BlockIndex):
            return 'block'
        elif isinstance(self.sp_index, IntIndex):
            return 'integer'

    def as_sparse_array(self, kind=None, fill_value=None, copy=False):
        """ return my self as a sparse array, do not copy by default """

        if fill_value is None:
            fill_value = self.fill_value
        if kind is None:
            kind = self.kind
        return SparseArray(self.values,
                           sparse_index=self.sp_index,
                           fill_value=fill_value,
                           kind=kind,
                           copy=copy)

    def __len__(self):
        return len(self.block)

    def __unicode__(self):
        # currently, unicode is same as repr...fixes infinite loop
        series_rep = Series.__unicode__(self)
        rep = '%s\n%s' % (series_rep, repr(self.sp_index))
        return rep

    def __array_wrap__(self, result):
        """
        Gets called prior to a ufunc (and after)
        """
        return self._constructor(result,
                                 index=self.index,
                                 sparse_index=self.sp_index,
                                 fill_value=self.fill_value,
                                 copy=False).__finalize__(self)

    def __array_finalize__(self, obj):
        """
        Gets called after any ufunc or other array operations, necessary
        to pass on the index.
        """
        self.name = getattr(obj, 'name', None)
        self.fill_value = getattr(obj, 'fill_value', None)

    def _reduce(self, op, axis=0, skipna=True, numeric_only=None,
                filter_type=None, name=None, **kwds):
        """ perform a reduction operation """
        return op(self.get_values(), skipna=skipna, **kwds)

    def __getstate__(self):
        # pickling
        return dict(_typ=self._typ,
                    _subtyp=self._subtyp,
                    _data=self._data,
                    fill_value=self.fill_value,
                    name=self.name)

    def _unpickle_series_compat(self, state):

        nd_state, own_state = state

        # recreate the ndarray
        data = np.empty(nd_state[1], dtype=nd_state[2])
        np.ndarray.__setstate__(data, nd_state)

        index, fill_value, sp_index = own_state[:3]
        name = None
        if len(own_state) > 3:
            name = own_state[3]

        # create a sparse array
        if not isinstance(data, SparseArray):
            data = SparseArray(
                data, sparse_index=sp_index, fill_value=fill_value, copy=False)

        # recreate
        data = SingleBlockManager(data, index, fastpath=True)
        generic.NDFrame.__init__(self, data)

        self._set_axis(0, index)
        self.name = name

    def __iter__(self):
        """ forward to the array """
        return iter(self.values)

    def _set_subtyp(self, is_all_dates):
        if is_all_dates:
            object.__setattr__(self, '_subtyp', 'sparse_time_series')
        else:
            object.__setattr__(self, '_subtyp', 'sparse_series')

    def _get_val_at(self, loc):
        """ forward to the array """
        return self.block.values._get_val_at(loc)

    def __getitem__(self, key):
        """

        """
        try:
            return self._get_val_at(self.index.get_loc(key))

        except KeyError:
            if isinstance(key, (int, np.integer)):
                return self._get_val_at(key)
            raise Exception('Requested index not in this series!')

        except TypeError:
            # Could not hash item, must be array-like?
            pass

        # is there a case where this would NOT be an ndarray?
        # need to find an example, I took out the case for now

        key = _values_from_object(key)
        dataSlice = self.values[key]
        new_index = Index(self.index.view(ndarray)[key])
        return self._constructor(dataSlice, index=new_index).__finalize__(self)

    def _set_with_engine(self, key, value):
        return self.set_value(key, value)

    def abs(self):
        """
        Return an object with absolute value taken. Only applicable to objects
        that are all numeric

        Returns
        -------
        abs: type of caller
        """
        res_sp_values = np.abs(self.sp_values)
        return self._constructor(res_sp_values, index=self.index,
                                 sparse_index=self.sp_index,
                                 fill_value=self.fill_value)

    def get(self, label, default=None):
        """
        Returns value occupying requested label, default to specified
        missing value if not present. Analogous to dict.get

        Parameters
        ----------
        label : object
            Label value looking for
        default : object, optional
            Value to return if label not in index

        Returns
        -------
        y : scalar
        """
        if label in self.index:
            loc = self.index.get_loc(label)
            return self._get_val_at(loc)
        else:
            return default

    def get_value(self, label, takeable=False):
        """
        Retrieve single value at passed index label

        Parameters
        ----------
        index : label
        takeable : interpret the index as indexers, default False

        Returns
        -------
        value : scalar value
        """
        loc = label if takeable is True else self.index.get_loc(label)
        return self._get_val_at(loc)

    def set_value(self, label, value, takeable=False):
        """
        Quickly set single value at passed label. If label is not contained, a
        new object is created with the label placed at the end of the result
        index

        Parameters
        ----------
        label : object
            Partial indexing with MultiIndex not allowed
        value : object
            Scalar value
        takeable : interpret the index as indexers, default False

        Notes
        -----
        This method *always* returns a new object. It is not particularly
        efficient but is provided for API compatibility with Series

        Returns
        -------
        series : SparseSeries
        """
        values = self.to_dense()

        # if the label doesn't exist, we will create a new object here
        # and possibily change the index
        new_values = values.set_value(label, value, takeable=takeable)
        if new_values is not None:
            values = new_values
        new_index = values.index
        values = SparseArray(
            values, fill_value=self.fill_value, kind=self.kind)
        self._data = SingleBlockManager(values, new_index)
        self._index = new_index

    def _set_values(self, key, value):

        # this might be inefficient as we have to recreate the sparse array
        # rather than setting individual elements, but have to convert
        # the passed slice/boolean that's in dense space into a sparse indexer
        # not sure how to do that!
        if isinstance(key, Series):
            key = key.values

        values = self.values.to_dense()
        values[key] = _index.convert_scalar(values, value)
        values = SparseArray(
            values, fill_value=self.fill_value, kind=self.kind)
        self._data = SingleBlockManager(values, self.index)

    def to_dense(self, sparse_only=False):
        """
        Convert SparseSeries to (dense) Series
        """
        if sparse_only:
            int_index = self.sp_index.to_int_index()
            index = self.index.take(int_index.indices)
            return Series(self.sp_values, index=index, name=self.name)
        else:
            return Series(self.values.to_dense(), index=self.index, name=self.name)

    @property
    def density(self):
        r = float(self.sp_index.npoints) / float(self.sp_index.length)
        return r

    def copy(self, deep=True):
        """
        Make a copy of the SparseSeries. Only the actual sparse values need to
        be copied
        """
        new_data = self._data
        if deep:
            new_data = self._data.copy()

        return self._constructor(new_data,
                                 sparse_index=self.sp_index,
                                 fill_value=self.fill_value).__finalize__(self)

    def reindex(self, index=None, method=None, copy=True, limit=None):
        """
        Conform SparseSeries to new Index

        See Series.reindex docstring for general behavior

        Returns
        -------
        reindexed : SparseSeries
        """
        new_index = _ensure_index(index)

        if self.index.equals(new_index):
            if copy:
                return self.copy()
            else:
                return self
        return self._constructor(self._data.reindex(new_index, method=method, limit=limit, copy=copy),
                                 index=new_index).__finalize__(self)

    def sparse_reindex(self, new_index):
        """
        Conform sparse values to new SparseIndex

        Parameters
        ----------
        new_index : {BlockIndex, IntIndex}

        Returns
        -------
        reindexed : SparseSeries
        """
        if not isinstance(new_index, splib.SparseIndex):
            raise TypeError('new index must be a SparseIndex')

        block = self.block.sparse_reindex(new_index)
        new_data = SingleBlockManager(block, self.index)
        return self._constructor(new_data, index=self.index,
                                 sparse_index=new_index,
                                 fill_value=self.fill_value).__finalize__(self)

    def take(self, indices, axis=0, convert=True):
        """
        Sparse-compatible version of ndarray.take

        Returns
        -------
        taken : ndarray
        """
        new_values = SparseArray.take(self.values, indices)
        new_index = self.index.take(indices)
        return self._constructor(new_values, index=new_index).__finalize__(self)

    def cumsum(self, axis=0, dtype=None, out=None):
        """
        Cumulative sum of values. Preserves locations of NaN values

        Returns
        -------
        cumsum : Series or SparseSeries
        """
        new_array = SparseArray.cumsum(self.values)
        if isinstance(new_array, SparseArray):
            return self._constructor(new_array, index=self.index, sparse_index=new_array.sp_index).__finalize__(self)
        return Series(new_array, index=self.index).__finalize__(self)

    def dropna(self, axis=0, inplace=False, **kwargs):
        """
        Analogous to Series.dropna. If fill_value=NaN, returns a dense Series
        """
        # TODO: make more efficient
        axis = self._get_axis_number(axis or 0)
        dense_valid = self.to_dense().valid()
        if inplace:
            raise NotImplementedError("Cannot perform inplace dropna"
                                      " operations on a SparseSeries")
        if isnull(self.fill_value):
            return dense_valid
        else:
            dense_valid = dense_valid[dense_valid != self.fill_value]
            return dense_valid.to_sparse(fill_value=self.fill_value)

    def shift(self, periods, freq=None, **kwds):
        """
        Analogous to Series.shift
        """
        from pandas.core.datetools import _resolve_offset

        offset = _resolve_offset(freq, kwds)

        # no special handling of fill values yet
        if not isnull(self.fill_value):
            dense_shifted = self.to_dense().shift(periods, freq=freq,
                                                  **kwds)
            return dense_shifted.to_sparse(fill_value=self.fill_value,
                                           kind=self.kind)

        if periods == 0:
            return self.copy()

        if offset is not None:
            return self._constructor(self.sp_values,
                                     sparse_index=self.sp_index,
                                     index=self.index.shift(periods, offset),
                                     fill_value=self.fill_value).__finalize__(self)

        int_index = self.sp_index.to_int_index()
        new_indices = int_index.indices + periods
        start, end = new_indices.searchsorted([0, int_index.length])

        new_indices = new_indices[start:end]

        new_sp_index = IntIndex(len(self), new_indices)
        if isinstance(self.sp_index, BlockIndex):
            new_sp_index = new_sp_index.to_block_index()

        return self._constructor(self.sp_values[start:end].copy(),
                                 index=self.index,
                                 sparse_index=new_sp_index,
                                 fill_value=self.fill_value).__finalize__(self)

    def combine_first(self, other):
        """
        Combine Series values, choosing the calling Series's values
        first. Result index will be the union of the two indexes

        Parameters
        ----------
        other : Series

        Returns
        -------
        y : Series
        """
        if isinstance(other, SparseSeries):
            other = other.to_dense()

        dense_combined = self.to_dense().combine_first(other)
        return dense_combined.to_sparse(fill_value=self.fill_value)
示例#17
0
文件: series.py 项目: giang12/pandas
    def __init__(self, data=None, index=None, sparse_index=None, kind='block',
                 fill_value=None, name=None, dtype=None, copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            if isinstance(data, SparseArray):
                if index is not None:
                    assert (len(index) == len(data))
                sparse_index = data.sp_index
                if fill_value is None:
                    fill_value = data.fill_value

                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()
                if fill_value is None:
                    fill_value = data.fill_value
                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                data = Series(data, index=index)
                index = data.index.view()

                res = make_sparse(data, kind=kind, fill_value=fill_value)
                data, sparse_index, fill_value = res

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    res = make_sparse(data, kind=kind, fill_value=fill_value)
                    data, sparse_index, fill_value = res
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                elif not data.index.equals(index) or copy:  # pragma: no cover
                    # GH#19275 SingleBlockManager input should only be called
                    # internally
                    raise AssertionError('Cannot pass both SingleBlockManager '
                                         '`data` argument and a different '
                                         '`index` argument.  `copy` must '
                                         'be False.')

            else:
                length = len(index)

                if data == fill_value or (isna(data) and isna(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = ibase.default_index(sparse_index.length)
            index = ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data, sparse_index=sparse_index,
                                       fill_value=fill_value, dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name
示例#18
0
def test_single_block_manager_fastpath_deprecated():
    # GH#33092
    ser = Series(range(3))
    blk = ser._data.blocks[0]
    with tm.assert_produces_warning(FutureWarning):
        SingleBlockManager(blk, ser.index, fastpath=True)
示例#19
0
class SparseSeries(Series):
    """Data structure for labeled, sparse floating point data

    Parameters
    ----------
    data : {array-like, Series, SparseSeries, dict}
    kind : {'block', 'integer'}
    fill_value : float
        Code for missing value. Defaults depends on dtype.
        0 for int dtype, False for bool dtype, and NaN for other dtypes
    sparse_index : {BlockIndex, IntIndex}, optional
        Only if you have one. Mainly used internally

    Notes
    -----
    SparseSeries objects are immutable via the typical Python means. If you
    must change values, convert to dense, make your changes, then convert back
    to sparse
    """
    _subtyp = 'sparse_series'

    def __init__(self, data=None, index=None, sparse_index=None, kind='block',
                 fill_value=None, name=None, dtype=None, copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            if isinstance(data, SparseArray):
                if index is not None:
                    assert (len(index) == len(data))
                sparse_index = data.sp_index
                if fill_value is None:
                    fill_value = data.fill_value

                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()
                if fill_value is None:
                    fill_value = data.fill_value
                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                if index is None:
                    index = data.index.view()

                data = Series(data)
                res = make_sparse(data, kind=kind, fill_value=fill_value)
                data, sparse_index, fill_value = res

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    res = make_sparse(data, kind=kind, fill_value=fill_value)
                    data, sparse_index, fill_value = res
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                else:

                    data = data.reindex(index, copy=False)

            else:
                length = len(index)

                if data == fill_value or (isnull(data) and isnull(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data, sparse_index=sparse_index,
                                       fill_value=fill_value, dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name

    @property
    def values(self):
        """ return the array """
        return self.block.values

    def __array__(self, result=None):
        """ the array interface, return my values """
        return self.block.values

    def get_values(self):
        """ same as values """
        return self.block.to_dense().view()

    @property
    def block(self):
        return self._data._block

    @property
    def fill_value(self):
        return self.block.fill_value

    @fill_value.setter
    def fill_value(self, v):
        self.block.fill_value = v

    @property
    def sp_index(self):
        return self.block.sp_index

    @property
    def sp_values(self):
        return self.values.sp_values

    @property
    def npoints(self):
        return self.sp_index.npoints

    @classmethod
    def from_array(cls, arr, index=None, name=None, copy=False,
                   fill_value=None, fastpath=False):
        """
        Simplified alternate constructor
        """
        return cls(arr, index=index, name=name, copy=copy,
                   fill_value=fill_value, fastpath=fastpath)

    @property
    def _constructor(self):
        return SparseSeries

    @property
    def _constructor_expanddim(self):
        from pandas.core.sparse.api import SparseDataFrame
        return SparseDataFrame

    @property
    def kind(self):
        if isinstance(self.sp_index, BlockIndex):
            return 'block'
        elif isinstance(self.sp_index, IntIndex):
            return 'integer'

    def as_sparse_array(self, kind=None, fill_value=None, copy=False):
        """ return my self as a sparse array, do not copy by default """

        if fill_value is None:
            fill_value = self.fill_value
        if kind is None:
            kind = self.kind
        return SparseArray(self.values, sparse_index=self.sp_index,
                           fill_value=fill_value, kind=kind, copy=copy)

    def __len__(self):
        return len(self.block)

    @property
    def shape(self):
        return self._data.shape

    def __unicode__(self):
        # currently, unicode is same as repr...fixes infinite loop
        series_rep = Series.__unicode__(self)
        rep = '%s\n%s' % (series_rep, repr(self.sp_index))
        return rep

    def __array_wrap__(self, result, context=None):
        """
        Gets called prior to a ufunc (and after)

        See SparseArray.__array_wrap__ for detail.
        """
        if isinstance(context, tuple) and len(context) == 3:
            ufunc, args, domain = context
            args = [getattr(a, 'fill_value', a) for a in args]
            with np.errstate(all='ignore'):
                fill_value = ufunc(self.fill_value, *args[1:])
        else:
            fill_value = self.fill_value

        return self._constructor(result, index=self.index,
                                 sparse_index=self.sp_index,
                                 fill_value=fill_value,
                                 copy=False).__finalize__(self)

    def __array_finalize__(self, obj):
        """
        Gets called after any ufunc or other array operations, necessary
        to pass on the index.
        """
        self.name = getattr(obj, 'name', None)
        self.fill_value = getattr(obj, 'fill_value', None)

    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                filter_type=None, **kwds):
        """ perform a reduction operation """
        return op(self.get_values(), skipna=skipna, **kwds)

    def __getstate__(self):
        # pickling
        return dict(_typ=self._typ, _subtyp=self._subtyp, _data=self._data,
                    fill_value=self.fill_value, name=self.name)

    def _unpickle_series_compat(self, state):

        nd_state, own_state = state

        # recreate the ndarray
        data = np.empty(nd_state[1], dtype=nd_state[2])
        np.ndarray.__setstate__(data, nd_state)

        index, fill_value, sp_index = own_state[:3]
        name = None
        if len(own_state) > 3:
            name = own_state[3]

        # create a sparse array
        if not isinstance(data, SparseArray):
            data = SparseArray(data, sparse_index=sp_index,
                               fill_value=fill_value, copy=False)

        # recreate
        data = SingleBlockManager(data, index, fastpath=True)
        generic.NDFrame.__init__(self, data)

        self._set_axis(0, index)
        self.name = name

    def __iter__(self):
        """ forward to the array """
        return iter(self.values)

    def _set_subtyp(self, is_all_dates):
        if is_all_dates:
            object.__setattr__(self, '_subtyp', 'sparse_time_series')
        else:
            object.__setattr__(self, '_subtyp', 'sparse_series')

    def _ixs(self, i, axis=0):
        """
        Return the i-th value or values in the SparseSeries by location

        Parameters
        ----------
        i : int, slice, or sequence of integers

        Returns
        -------
        value : scalar (int) or Series (slice, sequence)
        """
        label = self.index[i]
        if isinstance(label, Index):
            return self.take(i, axis=axis, convert=True)
        else:
            return self._get_val_at(i)

    def _get_val_at(self, loc):
        """ forward to the array """
        return self.block.values._get_val_at(loc)

    def __getitem__(self, key):
        try:
            return self.index.get_value(self, key)

        except InvalidIndexError:
            pass
        except KeyError:
            if isinstance(key, (int, np.integer)):
                return self._get_val_at(key)
            elif key is Ellipsis:
                return self
            raise Exception('Requested index not in this series!')

        except TypeError:
            # Could not hash item, must be array-like?
            pass

        key = _values_from_object(key)
        if self.index.nlevels > 1 and isinstance(key, tuple):
            # to handle MultiIndex labels
            key = self.index.get_loc(key)
        return self._constructor(self.values[key],
                                 index=self.index[key]).__finalize__(self)

    def _get_values(self, indexer):
        try:
            return self._constructor(self._data.get_slice(indexer),
                                     fastpath=True).__finalize__(self)
        except Exception:
            return self[indexer]

    def _set_with_engine(self, key, value):
        return self.set_value(key, value)

    def abs(self):
        """
        Return an object with absolute value taken. Only applicable to objects
        that are all numeric

        Returns
        -------
        abs: type of caller
        """
        return self._constructor(np.abs(self.values),
                                 index=self.index).__finalize__(self)

    def get(self, label, default=None):
        """
        Returns value occupying requested label, default to specified
        missing value if not present. Analogous to dict.get

        Parameters
        ----------
        label : object
            Label value looking for
        default : object, optional
            Value to return if label not in index

        Returns
        -------
        y : scalar
        """
        if label in self.index:
            loc = self.index.get_loc(label)
            return self._get_val_at(loc)
        else:
            return default

    def get_value(self, label, takeable=False):
        """
        Retrieve single value at passed index label

        Parameters
        ----------
        index : label
        takeable : interpret the index as indexers, default False

        Returns
        -------
        value : scalar value
        """
        loc = label if takeable is True else self.index.get_loc(label)
        return self._get_val_at(loc)

    def set_value(self, label, value, takeable=False):
        """
        Quickly set single value at passed label. If label is not contained, a
        new object is created with the label placed at the end of the result
        index

        Parameters
        ----------
        label : object
            Partial indexing with MultiIndex not allowed
        value : object
            Scalar value
        takeable : interpret the index as indexers, default False

        Notes
        -----
        This method *always* returns a new object. It is not particularly
        efficient but is provided for API compatibility with Series

        Returns
        -------
        series : SparseSeries
        """
        values = self.to_dense()

        # if the label doesn't exist, we will create a new object here
        # and possibily change the index
        new_values = values.set_value(label, value, takeable=takeable)
        if new_values is not None:
            values = new_values
        new_index = values.index
        values = SparseArray(values, fill_value=self.fill_value,
                             kind=self.kind)
        self._data = SingleBlockManager(values, new_index)
        self._index = new_index

    def _set_values(self, key, value):

        # this might be inefficient as we have to recreate the sparse array
        # rather than setting individual elements, but have to convert
        # the passed slice/boolean that's in dense space into a sparse indexer
        # not sure how to do that!
        if isinstance(key, Series):
            key = key.values

        values = self.values.to_dense()
        values[key] = _index.convert_scalar(values, value)
        values = SparseArray(values, fill_value=self.fill_value,
                             kind=self.kind)
        self._data = SingleBlockManager(values, self.index)

    def to_dense(self, sparse_only=False):
        """
        Convert SparseSeries to a Series.

        Parameters
        ----------
        sparse_only: bool, default False
            DEPRECATED: this argument will be removed in a future version.

            If True, return just the non-sparse values, or the dense version
            of `self.values` if False.

        Returns
        -------
        s : Series
        """
        if sparse_only:
            warnings.warn(("The 'sparse_only' parameter has been deprecated "
                           "and will be removed in a future version."),
                          FutureWarning, stacklevel=2)
            int_index = self.sp_index.to_int_index()
            index = self.index.take(int_index.indices)
            return Series(self.sp_values, index=index, name=self.name)
        else:
            return Series(self.values.to_dense(), index=self.index,
                          name=self.name)

    @property
    def density(self):
        r = float(self.sp_index.npoints) / float(self.sp_index.length)
        return r

    def copy(self, deep=True):
        """
        Make a copy of the SparseSeries. Only the actual sparse values need to
        be copied
        """
        new_data = self._data
        if deep:
            new_data = self._data.copy()

        return self._constructor(new_data, sparse_index=self.sp_index,
                                 fill_value=self.fill_value).__finalize__(self)

    @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
    def reindex(self, index=None, method=None, copy=True, limit=None,
                **kwargs):

        return super(SparseSeries, self).reindex(index=index, method=method,
                                                 copy=copy, limit=limit,
                                                 **kwargs)

    def sparse_reindex(self, new_index):
        """
        Conform sparse values to new SparseIndex

        Parameters
        ----------
        new_index : {BlockIndex, IntIndex}

        Returns
        -------
        reindexed : SparseSeries
        """
        if not isinstance(new_index, splib.SparseIndex):
            raise TypeError('new index must be a SparseIndex')

        block = self.block.sparse_reindex(new_index)
        new_data = SingleBlockManager(block, self.index)
        return self._constructor(new_data, index=self.index,
                                 sparse_index=new_index,
                                 fill_value=self.fill_value).__finalize__(self)

    def take(self, indices, axis=0, convert=True, *args, **kwargs):
        """
        Sparse-compatible version of ndarray.take

        Returns
        -------
        taken : ndarray
        """

        convert = nv.validate_take_with_convert(convert, args, kwargs)
        new_values = SparseArray.take(self.values, indices)
        new_index = self.index.take(indices)
        return self._constructor(new_values,
                                 index=new_index).__finalize__(self)

    def cumsum(self, axis=0, *args, **kwargs):
        """
        Cumulative sum of non-NA/null values.

        When performing the cumulative summation, any non-NA/null values will
        be skipped. The resulting SparseSeries will preserve the locations of
        NaN values, but the fill value will be `np.nan` regardless.

        Parameters
        ----------
        axis : {0}

        Returns
        -------
        cumsum : SparseSeries
        """
        nv.validate_cumsum(args, kwargs)
        if axis is not None:
            axis = self._get_axis_number(axis)

        new_array = self.values.cumsum()

        return self._constructor(
            new_array, index=self.index,
            sparse_index=new_array.sp_index).__finalize__(self)

    @Appender(generic._shared_docs['isnull'])
    def isnull(self):
        arr = SparseArray(isnull(self.values.sp_values),
                          sparse_index=self.values.sp_index,
                          fill_value=isnull(self.fill_value))
        return self._constructor(arr, index=self.index).__finalize__(self)

    @Appender(generic._shared_docs['isnotnull'])
    def isnotnull(self):
        arr = SparseArray(notnull(self.values.sp_values),
                          sparse_index=self.values.sp_index,
                          fill_value=notnull(self.fill_value))
        return self._constructor(arr, index=self.index).__finalize__(self)

    def dropna(self, axis=0, inplace=False, **kwargs):
        """
        Analogous to Series.dropna. If fill_value=NaN, returns a dense Series
        """
        # TODO: make more efficient
        axis = self._get_axis_number(axis or 0)
        dense_valid = self.to_dense().valid()
        if inplace:
            raise NotImplementedError("Cannot perform inplace dropna"
                                      " operations on a SparseSeries")
        if isnull(self.fill_value):
            return dense_valid
        else:
            dense_valid = dense_valid[dense_valid != self.fill_value]
            return dense_valid.to_sparse(fill_value=self.fill_value)

    @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
    def shift(self, periods, freq=None, axis=0):
        if periods == 0:
            return self.copy()

        # no special handling of fill values yet
        if not isnull(self.fill_value):
            shifted = self.to_dense().shift(periods, freq=freq,
                                            axis=axis)
            return shifted.to_sparse(fill_value=self.fill_value,
                                     kind=self.kind)

        if freq is not None:
            return self._constructor(
                self.sp_values, sparse_index=self.sp_index,
                index=self.index.shift(periods, freq),
                fill_value=self.fill_value).__finalize__(self)

        int_index = self.sp_index.to_int_index()
        new_indices = int_index.indices + periods
        start, end = new_indices.searchsorted([0, int_index.length])

        new_indices = new_indices[start:end]
        new_sp_index = _make_index(len(self), new_indices, self.sp_index)

        arr = self.values._simple_new(self.sp_values[start:end].copy(),
                                      new_sp_index, fill_value=np.nan)
        return self._constructor(arr, index=self.index).__finalize__(self)

    def combine_first(self, other):
        """
        Combine Series values, choosing the calling Series's values
        first. Result index will be the union of the two indexes

        Parameters
        ----------
        other : Series

        Returns
        -------
        y : Series
        """
        if isinstance(other, SparseSeries):
            other = other.to_dense()

        dense_combined = self.to_dense().combine_first(other)
        return dense_combined.to_sparse(fill_value=self.fill_value)

    def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False):
        """
        Create a scipy.sparse.coo_matrix from a SparseSeries with MultiIndex.

        Use row_levels and column_levels to determine the row and column
        coordinates respectively. row_levels and column_levels are the names
        (labels) or numbers of the levels. {row_levels, column_levels} must be
        a partition of the MultiIndex level names (or numbers).

        .. versionadded:: 0.16.0

        Parameters
        ----------
        row_levels : tuple/list
        column_levels : tuple/list
        sort_labels : bool, default False
            Sort the row and column labels before forming the sparse matrix.

        Returns
        -------
        y : scipy.sparse.coo_matrix
        rows : list (row labels)
        columns : list (column labels)

        Examples
        --------
        >>> from numpy import nan
        >>> s = Series([3.0, nan, 1.0, 3.0, nan, nan])
        >>> s.index = MultiIndex.from_tuples([(1, 2, 'a', 0),
                                              (1, 2, 'a', 1),
                                              (1, 1, 'b', 0),
                                              (1, 1, 'b', 1),
                                              (2, 1, 'b', 0),
                                              (2, 1, 'b', 1)],
                                              names=['A', 'B', 'C', 'D'])
        >>> ss = s.to_sparse()
        >>> A, rows, columns = ss.to_coo(row_levels=['A', 'B'],
                                         column_levels=['C', 'D'],
                                         sort_labels=True)
        >>> A
        <3x4 sparse matrix of type '<class 'numpy.float64'>'
                with 3 stored elements in COOrdinate format>
        >>> A.todense()
        matrix([[ 0.,  0.,  1.,  3.],
        [ 3.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]])
        >>> rows
        [(1, 1), (1, 2), (2, 1)]
        >>> columns
        [('a', 0), ('a', 1), ('b', 0), ('b', 1)]
        """
        A, rows, columns = _sparse_series_to_coo(self, row_levels,
                                                 column_levels,
                                                 sort_labels=sort_labels)
        return A, rows, columns

    @classmethod
    def from_coo(cls, A, dense_index=False):
        """
        Create a SparseSeries from a scipy.sparse.coo_matrix.

        .. versionadded:: 0.16.0

        Parameters
        ----------
        A : scipy.sparse.coo_matrix
        dense_index : bool, default False
            If False (default), the SparseSeries index consists of only the
            coords of the non-null entries of the original coo_matrix.
            If True, the SparseSeries index consists of the full sorted
            (row, col) coordinates of the coo_matrix.

        Returns
        -------
        s : SparseSeries

        Examples
        ---------
        >>> from scipy import sparse
        >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])),
                               shape=(3, 4))
        >>> A
        <3x4 sparse matrix of type '<class 'numpy.float64'>'
                with 3 stored elements in COOrdinate format>
        >>> A.todense()
        matrix([[ 0.,  0.,  1.,  2.],
                [ 3.,  0.,  0.,  0.],
                [ 0.,  0.,  0.,  0.]])
        >>> ss = SparseSeries.from_coo(A)
        >>> ss
        0  2    1
           3    2
        1  0    3
        dtype: float64
        BlockIndex
        Block locations: array([0], dtype=int32)
        Block lengths: array([3], dtype=int32)
        """
        return _coo_to_sparse_series(A, dense_index=dense_index)
示例#20
0
class SparseSeries(Series):
    """Data structure for labeled, sparse floating point data

    Parameters
    ----------
    data : {array-like, Series, SparseSeries, dict}
    kind : {'block', 'integer'}
    fill_value : float
        Defaults to NaN (code for missing)
    sparse_index : {BlockIndex, IntIndex}, optional
        Only if you have one. Mainly used internally

    Notes
    -----
    SparseSeries objects are immutable via the typical Python means. If you
    must change values, convert to dense, make your changes, then convert back
    to sparse
    """
    _subtyp = 'sparse_series'

    def __init__(self,
                 data,
                 index=None,
                 sparse_index=None,
                 kind='block',
                 fill_value=None,
                 name=None,
                 dtype=None,
                 copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined
            data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()
        else:

            is_sparse_array = isinstance(data, SparseArray)
            if fill_value is None:
                if is_sparse_array:
                    fill_value = data.fill_value
                else:
                    fill_value = nan

            if is_sparse_array:
                if isinstance(data, SparseSeries) and index is None:
                    index = data.index
                elif index is not None:
                    assert (len(index) == len(data))

                sparse_index = data.sp_index
                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index

                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                if index is None:
                    index = data.index

                data = Series(data)
                data, sparse_index = make_sparse(data,
                                                 kind=kind,
                                                 fill_value=fill_value)

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    data, sparse_index = make_sparse(data,
                                                     kind=kind,
                                                     fill_value=fill_value)
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index
                else:
                    data = data.reindex(index, copy=False)

            else:

                length = len(index)

                if data == fill_value or (isnull(data) and isnull(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data,
                                       sparse_index=sparse_index,
                                       fill_value=fill_value,
                                       dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name

    @property
    def values(self):
        """ return the array """
        return self._data._values

    def get_values(self):
        """ same as values """
        return self._data._values.to_dense().view()

    @property
    def block(self):
        return self._data._block

    @property
    def fill_value(self):
        return self.block.fill_value

    @fill_value.setter
    def fill_value(self, v):
        self.block.fill_value = v

    @property
    def sp_index(self):
        return self.block.sp_index

    @property
    def sp_values(self):
        return self.values.sp_values

    @property
    def npoints(self):
        return self.sp_index.npoints

    @classmethod
    def from_array(cls,
                   arr,
                   index=None,
                   name=None,
                   copy=False,
                   fill_value=None,
                   fastpath=False):
        """
        Simplified alternate constructor
        """
        return cls(arr,
                   index=index,
                   name=name,
                   copy=copy,
                   fill_value=fill_value,
                   fastpath=fastpath)

    @property
    def _constructor(self):
        return SparseSeries

    @property
    def kind(self):
        if isinstance(self.sp_index, BlockIndex):
            return 'block'
        elif isinstance(self.sp_index, IntIndex):
            return 'integer'

    def as_sparse_array(self, kind=None, fill_value=None, copy=False):
        """ return my self as a sparse array, do not copy by default """

        if fill_value is None:
            fill_value = self.fill_value
        if kind is None:
            kind = self.kind
        return SparseArray(self.values,
                           sparse_index=self.sp_index,
                           fill_value=fill_value,
                           kind=kind,
                           copy=copy)

    def __len__(self):
        return len(self.block)

    def __unicode__(self):
        # currently, unicode is same as repr...fixes infinite loop
        series_rep = Series.__unicode__(self)
        rep = '%s\n%s' % (series_rep, repr(self.sp_index))
        return rep

    # Arithmetic operators

    __add__ = _sparse_op_wrap(operator.add, 'add')
    __sub__ = _sparse_op_wrap(operator.sub, 'sub')
    __mul__ = _sparse_op_wrap(operator.mul, 'mul')
    __truediv__ = _sparse_op_wrap(operator.truediv, 'truediv')
    __floordiv__ = _sparse_op_wrap(operator.floordiv, 'floordiv')
    __pow__ = _sparse_op_wrap(operator.pow, 'pow')

    # Inplace operators
    __iadd__ = __add__
    __isub__ = __sub__
    __imul__ = __mul__
    __itruediv__ = __truediv__
    __ifloordiv__ = __floordiv__
    __ipow__ = __pow__

    # reverse operators
    __radd__ = _sparse_op_wrap(operator.add, '__radd__')
    __rsub__ = _sparse_op_wrap(lambda x, y: y - x, '__rsub__')
    __rmul__ = _sparse_op_wrap(operator.mul, '__rmul__')
    __rtruediv__ = _sparse_op_wrap(lambda x, y: y / x, '__rtruediv__')
    __rfloordiv__ = _sparse_op_wrap(lambda x, y: y // x, 'floordiv')
    __rpow__ = _sparse_op_wrap(lambda x, y: y**x, '__rpow__')

    # Python 2 division operators
    if not compat.PY3:
        __div__ = _sparse_op_wrap(operator.div, 'div')
        __rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__')

    def __array_wrap__(self, result):
        """
        Gets called prior to a ufunc (and after)
        """
        return self._constructor(result,
                                 index=self.index,
                                 sparse_index=self.sp_index,
                                 fill_value=self.fill_value,
                                 copy=False)

    def __array_finalize__(self, obj):
        """
        Gets called after any ufunc or other array operations, necessary
        to pass on the index.
        """
        self.name = getattr(obj, 'name', None)
        self.fill_value = getattr(obj, 'fill_value', None)

    def __getstate__(self):
        # pickling
        return dict(_typ=self._typ,
                    _subtyp=self._subtyp,
                    _data=self._data,
                    fill_value=self.fill_value,
                    name=self.name)

    def _unpickle_series_compat(self, state):

        nd_state, own_state = state

        # recreate the ndarray
        data = np.empty(nd_state[1], dtype=nd_state[2])
        np.ndarray.__setstate__(data, nd_state)

        index, fill_value, sp_index = own_state[:3]
        name = None
        if len(own_state) > 3:
            name = own_state[3]

        # create a sparse array
        if not isinstance(data, SparseArray):
            data = SparseArray(data,
                               sparse_index=sp_index,
                               fill_value=fill_value,
                               copy=False)

        # recreate
        data = SingleBlockManager(data, index, fastpath=True)
        generic.NDFrame.__init__(self, data)

        self._set_axis(0, index)
        self.name = name

    def __iter__(self):
        """ forward to the array """
        return iter(self.values)

    def _set_subtyp(self, is_all_dates):
        if is_all_dates:
            object.__setattr__(self, '_subtyp', 'sparse_time_series')
        else:
            object.__setattr__(self, '_subtyp', 'sparse_series')

    def _get_val_at(self, loc):
        """ forward to the array """
        return self.block.values._get_val_at(loc)

    def __getitem__(self, key):
        """

        """
        try:
            return self._get_val_at(self.index.get_loc(key))

        except KeyError:
            if isinstance(key, (int, np.integer)):
                return self._get_val_at(key)
            raise Exception('Requested index not in this series!')

        except TypeError:
            # Could not hash item, must be array-like?
            pass

        # is there a case where this would NOT be an ndarray?
        # need to find an example, I took out the case for now

        key = _values_from_object(key)
        dataSlice = self.values[key]
        new_index = Index(self.index.view(ndarray)[key])
        return self._constructor(dataSlice, index=new_index, name=self.name)

    def _set_with_engine(self, key, value):
        return self.set_value(key, value)

    def abs(self):
        """
        Return an object with absolute value taken. Only applicable to objects
        that are all numeric

        Returns
        -------
        abs: type of caller
        """
        res_sp_values = np.abs(self.sp_values)
        return SparseSeries(res_sp_values,
                            index=self.index,
                            sparse_index=self.sp_index,
                            fill_value=self.fill_value)

    def get(self, label, default=None):
        """
        Returns value occupying requested label, default to specified
        missing value if not present. Analogous to dict.get

        Parameters
        ----------
        label : object
            Label value looking for
        default : object, optional
            Value to return if label not in index

        Returns
        -------
        y : scalar
        """
        if label in self.index:
            loc = self.index.get_loc(label)
            return self._get_val_at(loc)
        else:
            return default

    def get_value(self, label):
        """
        Retrieve single value at passed index label

        Parameters
        ----------
        index : label

        Returns
        -------
        value : scalar value
        """
        loc = self.index.get_loc(label)
        return self._get_val_at(loc)

    def set_value(self, label, value):
        """
        Quickly set single value at passed label. If label is not contained, a
        new object is created with the label placed at the end of the result
        index

        Parameters
        ----------
        label : object
            Partial indexing with MultiIndex not allowed
        value : object
            Scalar value

        Notes
        -----
        This method *always* returns a new object. It is not particularly
        efficient but is provided for API compatibility with Series

        Returns
        -------
        series : SparseSeries
        """
        values = self.to_dense()

        # if the label doesn't exist, we will create a new object here
        # and possibily change the index
        new_values = values.set_value(label, value)
        if new_values is not None:
            values = new_values
        new_index = values.index
        values = SparseArray(values,
                             fill_value=self.fill_value,
                             kind=self.kind)
        self._data = SingleBlockManager(values, new_index)
        self._index = new_index

    def _set_values(self, key, value):

        # this might be inefficient as we have to recreate the sparse array
        # rather than setting individual elements, but have to convert
        # the passed slice/boolean that's in dense space into a sparse indexer
        # not sure how to do that!
        if isinstance(key, Series):
            key = key.values

        values = self.values.to_dense()
        values[key] = _index.convert_scalar(values, value)
        values = SparseArray(values,
                             fill_value=self.fill_value,
                             kind=self.kind)
        self._data = SingleBlockManager(values, self.index)

    def to_dense(self, sparse_only=False):
        """
        Convert SparseSeries to (dense) Series
        """
        if sparse_only:
            int_index = self.sp_index.to_int_index()
            index = self.index.take(int_index.indices)
            return Series(self.sp_values, index=index, name=self.name)
        else:
            return Series(self.values.to_dense(),
                          index=self.index,
                          name=self.name)

    @property
    def density(self):
        r = float(self.sp_index.npoints) / float(self.sp_index.length)
        return r

    def copy(self, deep=True):
        """
        Make a copy of the SparseSeries. Only the actual sparse values need to
        be copied
        """
        new_data = self._data
        if deep:
            new_data = self._data.copy()

        return self._constructor(new_data,
                                 index=self.index,
                                 sparse_index=self.sp_index,
                                 fill_value=self.fill_value,
                                 name=self.name)

    def reindex(self, index=None, method=None, copy=True, limit=None):
        """
        Conform SparseSeries to new Index

        See Series.reindex docstring for general behavior

        Returns
        -------
        reindexed : SparseSeries
        """
        new_index = _ensure_index(index)

        if self.index.equals(new_index):
            if copy:
                return self.copy()
            else:
                return self
        return self._constructor(self._data.reindex(new_index,
                                                    method=method,
                                                    limit=limit,
                                                    copy=copy),
                                 index=new_index,
                                 name=self.name)

    def sparse_reindex(self, new_index):
        """
        Conform sparse values to new SparseIndex

        Parameters
        ----------
        new_index : {BlockIndex, IntIndex}

        Returns
        -------
        reindexed : SparseSeries
        """
        if not (isinstance(new_index, splib.SparseIndex)):
            raise AssertionError()

        block = self.block.sparse_reindex(new_index)
        new_data = SingleBlockManager(block, block.ref_items)
        return self._constructor(new_data,
                                 index=self.index,
                                 sparse_index=new_index,
                                 fill_value=self.fill_value)

    def take(self, indices, axis=0, convert=True):
        """
        Sparse-compatible version of ndarray.take

        Returns
        -------
        taken : ndarray
        """
        new_values = SparseArray.take(self.values, indices)
        new_index = self.index.take(indices)
        return self._constructor(new_values, index=new_index)

    def cumsum(self, axis=0, dtype=None, out=None):
        """
        Cumulative sum of values. Preserves locations of NaN values

        Returns
        -------
        cumsum : Series or SparseSeries
        """
        new_array = SparseArray.cumsum(self.values)
        if isinstance(new_array, SparseArray):
            return self._constructor(new_array,
                                     index=self.index,
                                     sparse_index=new_array.sp_index,
                                     name=self.name)
        return Series(new_array, index=self.index, name=self.name)

    def dropna(self):
        """
        Analogous to Series.dropna. If fill_value=NaN, returns a dense Series
        """
        # TODO: make more efficient
        dense_valid = self.to_dense().valid()
        if isnull(self.fill_value):
            return dense_valid
        else:
            dense_valid = dense_valid[dense_valid != self.fill_value]
            return dense_valid.to_sparse(fill_value=self.fill_value)

    def shift(self, periods, freq=None, **kwds):
        """
        Analogous to Series.shift
        """
        from pandas.core.datetools import _resolve_offset

        offset = _resolve_offset(freq, kwds)

        # no special handling of fill values yet
        if not isnull(self.fill_value):
            dense_shifted = self.to_dense().shift(periods, freq=freq, **kwds)
            return dense_shifted.to_sparse(fill_value=self.fill_value,
                                           kind=self.kind)

        if periods == 0:
            return self.copy()

        if offset is not None:
            return self._constructor(self.sp_values,
                                     sparse_index=self.sp_index,
                                     index=self.index.shift(periods, offset),
                                     fill_value=self.fill_value)

        int_index = self.sp_index.to_int_index()
        new_indices = int_index.indices + periods
        start, end = new_indices.searchsorted([0, int_index.length])

        new_indices = new_indices[start:end]

        new_sp_index = IntIndex(len(self), new_indices)
        if isinstance(self.sp_index, BlockIndex):
            new_sp_index = new_sp_index.to_block_index()

        return self._constructor(self.sp_values[start:end].copy(),
                                 index=self.index,
                                 sparse_index=new_sp_index,
                                 fill_value=self.fill_value)

    def combine_first(self, other):
        """
        Combine Series values, choosing the calling Series's values
        first. Result index will be the union of the two indexes

        Parameters
        ----------
        other : Series

        Returns
        -------
        y : Series
        """
        if isinstance(other, SparseSeries):
            other = other.to_dense()

        dense_combined = self.to_dense().combine_first(other)
        return dense_combined.to_sparse(fill_value=self.fill_value)
示例#21
0
    def __init__(self, data=None, index=None, crs=None, **kwargs):
        if hasattr(data, "crs") and crs:
            if not data.crs:
                # make a copy to avoid setting CRS to passed GeometryArray
                data = data.copy()
            else:
                if not data.crs == crs:
                    raise ValueError(
                        "CRS mismatch between CRS of the passed geometries "
                        "and 'crs'. Use 'GeoSeries.set_crs(crs, "
                        "allow_override=True)' to overwrite CRS or "
                        "'GeoSeries.to_crs(crs)' to reproject geometries. ")

        if isinstance(data, SingleBlockManager):
            if isinstance(data.blocks[0].dtype, GeometryDtype):
                if data.blocks[0].ndim == 2:
                    # bug in pandas 0.23 where in certain indexing operations
                    # (such as .loc) a 2D ExtensionBlock (still with 1D values
                    # is created) which results in other failures
                    # bug in pandas <= 0.25.0 when len(values) == 1
                    #   (https://github.com/pandas-dev/pandas/issues/27785)
                    from pandas.core.internals import ExtensionBlock

                    values = data.blocks[0].values
                    block = ExtensionBlock(values,
                                           slice(0, len(values), 1),
                                           ndim=1)
                    data = SingleBlockManager([block],
                                              data.axes[0],
                                              fastpath=True)
            else:
                raise TypeError(
                    "Non geometry data passed to GeoSeries constructor, "
                    f"received data of dtype '{data.blocks[0].dtype}'")

        if isinstance(data, BaseGeometry):
            # fix problem for scalar geometries passed, ensure the list of
            # scalars is of correct length if index is specified
            n = len(index) if index is not None else 1
            data = [data] * n

        name = kwargs.pop("name", None)

        if not is_geometry_type(data):
            # if data is None and dtype is specified (eg from empty overlay
            # test), specifying dtype raises an error:
            # https://github.com/pandas-dev/pandas/issues/26469
            kwargs.pop("dtype", None)
            # Use Series constructor to handle input data
            with compat.ignore_shapely2_warnings():
                # suppress additional warning from pandas for empty data
                # (will always give object dtype instead of float dtype in the future,
                # making the `if s.empty: s = s.astype(object)` below unnecessary)
                empty_msg = "The default dtype for empty Series"
                warnings.filterwarnings("ignore", empty_msg,
                                        DeprecationWarning)
                warnings.filterwarnings("ignore", empty_msg, FutureWarning)
                s = pd.Series(data, index=index, name=name, **kwargs)
            # prevent trying to convert non-geometry objects
            if s.dtype != object:
                if (s.empty and s.dtype == "float64") or data is None:
                    # pd.Series with empty data gives float64 for older pandas versions
                    s = s.astype(object)
                else:
                    raise TypeError(
                        "Non geometry data passed to GeoSeries constructor, "
                        f"received data of dtype '{s.dtype}'")
            # try to convert to GeometryArray, if fails return plain Series
            try:
                data = from_shapely(s.values, crs)
            except TypeError:
                raise TypeError(
                    "Non geometry data passed to GeoSeries constructor, "
                    f"received data of dtype '{s.dtype}'")
            index = s.index
            name = s.name

        super().__init__(data, index=index, name=name, **kwargs)
        if not self.crs:
            self.crs = crs
示例#22
0
    def __init__(self,
                 data=None,
                 index=None,
                 sparse_index=None,
                 kind='block',
                 fill_value=None,
                 name=None,
                 dtype=None,
                 copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            is_sparse_array = isinstance(data, SparseArray)
            if fill_value is None:
                if is_sparse_array:
                    fill_value = data.fill_value
                else:
                    fill_value = np.nan

            if is_sparse_array:
                if isinstance(data, SparseSeries) and index is None:
                    index = data.index.view()
                elif index is not None:
                    assert (len(index) == len(data))

                sparse_index = data.sp_index
                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()

                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                if index is None:
                    index = data.index.view()

                data = Series(data)
                data, sparse_index = make_sparse(data,
                                                 kind=kind,
                                                 fill_value=fill_value)

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    data, sparse_index = make_sparse(data,
                                                     kind=kind,
                                                     fill_value=fill_value)
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                else:

                    data = data.reindex(index, copy=False)

            else:
                length = len(index)

                if data == fill_value or (isnull(data) and isnull(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data,
                                       sparse_index=sparse_index,
                                       fill_value=fill_value,
                                       dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name
示例#23
0
    def __init__(self,
                 data=None,
                 index=None,
                 sparse_index=None,
                 kind='block',
                 fill_value=None,
                 name=None,
                 dtype=None,
                 copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            if isinstance(data, SparseArray):
                if index is not None:
                    assert (len(index) == len(data))
                sparse_index = data.sp_index
                if fill_value is None:
                    fill_value = data.fill_value

                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()
                if fill_value is None:
                    fill_value = data.fill_value
                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                data = Series(data, index=index)
                index = data.index.view()

                res = make_sparse(data, kind=kind, fill_value=fill_value)
                data, sparse_index, fill_value = res

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    res = make_sparse(data, kind=kind, fill_value=fill_value)
                    data, sparse_index, fill_value = res
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                elif not data.index.equals(index) or copy:  # pragma: no cover
                    # GH#19275 SingleBlockManager input should only be called
                    # internally
                    raise AssertionError('Cannot pass both SingleBlockManager '
                                         '`data` argument and a different '
                                         '`index` argument.  `copy` must '
                                         'be False.')

            else:
                length = len(index)

                if data == fill_value or (isna(data) and isna(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data,
                                       sparse_index=sparse_index,
                                       fill_value=fill_value,
                                       dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name
示例#24
0
class SparseSeries(Series):
    """Data structure for labeled, sparse floating point data

    Parameters
    ----------
    data : {array-like, Series, SparseSeries, dict}
    kind : {'block', 'integer'}
    fill_value : float
        Defaults to NaN (code for missing)
    sparse_index : {BlockIndex, IntIndex}, optional
        Only if you have one. Mainly used internally

    Notes
    -----
    SparseSeries objects are immutable via the typical Python means. If you
    must change values, convert to dense, make your changes, then convert back
    to sparse
    """
    _subtyp = 'sparse_series'

    def __init__(self,
                 data=None,
                 index=None,
                 sparse_index=None,
                 kind='block',
                 fill_value=None,
                 name=None,
                 dtype=None,
                 copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            is_sparse_array = isinstance(data, SparseArray)
            if fill_value is None:
                if is_sparse_array:
                    fill_value = data.fill_value
                else:
                    fill_value = np.nan

            if is_sparse_array:
                if isinstance(data, SparseSeries) and index is None:
                    index = data.index.view()
                elif index is not None:
                    assert (len(index) == len(data))

                sparse_index = data.sp_index
                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()

                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                if index is None:
                    index = data.index.view()

                data = Series(data)
                data, sparse_index = make_sparse(data,
                                                 kind=kind,
                                                 fill_value=fill_value)

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    data, sparse_index = make_sparse(data,
                                                     kind=kind,
                                                     fill_value=fill_value)
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                else:

                    data = data.reindex(index, copy=False)

            else:
                length = len(index)

                if data == fill_value or (isnull(data) and isnull(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data,
                                       sparse_index=sparse_index,
                                       fill_value=fill_value,
                                       dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name

    @property
    def values(self):
        """ return the array """
        return self.block.values

    def __array__(self, result=None):
        """ the array interface, return my values """
        return self.block.values

    def get_values(self):
        """ same as values """
        return self.block.to_dense().view()

    @property
    def block(self):
        return self._data._block

    @property
    def fill_value(self):
        return self.block.fill_value

    @fill_value.setter
    def fill_value(self, v):
        self.block.fill_value = v

    @property
    def sp_index(self):
        return self.block.sp_index

    @property
    def sp_values(self):
        return self.values.sp_values

    @property
    def npoints(self):
        return self.sp_index.npoints

    @classmethod
    def from_array(cls,
                   arr,
                   index=None,
                   name=None,
                   copy=False,
                   fill_value=None,
                   fastpath=False):
        """
        Simplified alternate constructor
        """
        return cls(arr,
                   index=index,
                   name=name,
                   copy=copy,
                   fill_value=fill_value,
                   fastpath=fastpath)

    @property
    def _constructor(self):
        return SparseSeries

    @property
    def _constructor_expanddim(self):
        from pandas.sparse.api import SparseDataFrame
        return SparseDataFrame

    @property
    def kind(self):
        if isinstance(self.sp_index, BlockIndex):
            return 'block'
        elif isinstance(self.sp_index, IntIndex):
            return 'integer'

    def as_sparse_array(self, kind=None, fill_value=None, copy=False):
        """ return my self as a sparse array, do not copy by default """

        if fill_value is None:
            fill_value = self.fill_value
        if kind is None:
            kind = self.kind
        return SparseArray(self.values,
                           sparse_index=self.sp_index,
                           fill_value=fill_value,
                           kind=kind,
                           copy=copy)

    def __len__(self):
        return len(self.block)

    @property
    def shape(self):
        return self._data.shape

    def __unicode__(self):
        # currently, unicode is same as repr...fixes infinite loop
        series_rep = Series.__unicode__(self)
        rep = '%s\n%s' % (series_rep, repr(self.sp_index))
        return rep

    def __array_wrap__(self, result):
        """
        Gets called prior to a ufunc (and after)
        """
        return self._constructor(result,
                                 index=self.index,
                                 sparse_index=self.sp_index,
                                 fill_value=self.fill_value,
                                 copy=False).__finalize__(self)

    def __array_finalize__(self, obj):
        """
        Gets called after any ufunc or other array operations, necessary
        to pass on the index.
        """
        self.name = getattr(obj, 'name', None)
        self.fill_value = getattr(obj, 'fill_value', None)

    def _reduce(self,
                op,
                name,
                axis=0,
                skipna=True,
                numeric_only=None,
                filter_type=None,
                **kwds):
        """ perform a reduction operation """
        return op(self.get_values(), skipna=skipna, **kwds)

    def __getstate__(self):
        # pickling
        return dict(_typ=self._typ,
                    _subtyp=self._subtyp,
                    _data=self._data,
                    fill_value=self.fill_value,
                    name=self.name)

    def _unpickle_series_compat(self, state):

        nd_state, own_state = state

        # recreate the ndarray
        data = np.empty(nd_state[1], dtype=nd_state[2])
        np.ndarray.__setstate__(data, nd_state)

        index, fill_value, sp_index = own_state[:3]
        name = None
        if len(own_state) > 3:
            name = own_state[3]

        # create a sparse array
        if not isinstance(data, SparseArray):
            data = SparseArray(data,
                               sparse_index=sp_index,
                               fill_value=fill_value,
                               copy=False)

        # recreate
        data = SingleBlockManager(data, index, fastpath=True)
        generic.NDFrame.__init__(self, data)

        self._set_axis(0, index)
        self.name = name

    def __iter__(self):
        """ forward to the array """
        return iter(self.values)

    def _set_subtyp(self, is_all_dates):
        if is_all_dates:
            object.__setattr__(self, '_subtyp', 'sparse_time_series')
        else:
            object.__setattr__(self, '_subtyp', 'sparse_series')

    def _ixs(self, i, axis=0):
        """
        Return the i-th value or values in the SparseSeries by location

        Parameters
        ----------
        i : int, slice, or sequence of integers

        Returns
        -------
        value : scalar (int) or Series (slice, sequence)
        """
        label = self.index[i]
        if isinstance(label, Index):
            return self.take(i, axis=axis, convert=True)
        else:
            return self._get_val_at(i)

    def _get_val_at(self, loc):
        """ forward to the array """
        return self.block.values._get_val_at(loc)

    def __getitem__(self, key):
        try:
            return self.index.get_value(self, key)

        except InvalidIndexError:
            pass
        except KeyError:
            if isinstance(key, (int, np.integer)):
                return self._get_val_at(key)
            elif key is Ellipsis:
                return self
            raise Exception('Requested index not in this series!')

        except TypeError:
            # Could not hash item, must be array-like?
            pass

        key = _values_from_object(key)
        if self.index.nlevels > 1 and isinstance(key, tuple):
            # to handle MultiIndex labels
            key = self.index.get_loc(key)
        return self._constructor(self.values[key],
                                 index=self.index[key]).__finalize__(self)

    def _get_values(self, indexer):
        try:
            return self._constructor(self._data.get_slice(indexer),
                                     fastpath=True).__finalize__(self)
        except Exception:
            return self[indexer]

    def _set_with_engine(self, key, value):
        return self.set_value(key, value)

    def abs(self):
        """
        Return an object with absolute value taken. Only applicable to objects
        that are all numeric

        Returns
        -------
        abs: type of caller
        """
        res_sp_values = np.abs(self.sp_values)
        return self._constructor(res_sp_values,
                                 index=self.index,
                                 sparse_index=self.sp_index,
                                 fill_value=self.fill_value).__finalize__(self)

    def get(self, label, default=None):
        """
        Returns value occupying requested label, default to specified
        missing value if not present. Analogous to dict.get

        Parameters
        ----------
        label : object
            Label value looking for
        default : object, optional
            Value to return if label not in index

        Returns
        -------
        y : scalar
        """
        if label in self.index:
            loc = self.index.get_loc(label)
            return self._get_val_at(loc)
        else:
            return default

    def get_value(self, label, takeable=False):
        """
        Retrieve single value at passed index label

        Parameters
        ----------
        index : label
        takeable : interpret the index as indexers, default False

        Returns
        -------
        value : scalar value
        """
        loc = label if takeable is True else self.index.get_loc(label)
        return self._get_val_at(loc)

    def set_value(self, label, value, takeable=False):
        """
        Quickly set single value at passed label. If label is not contained, a
        new object is created with the label placed at the end of the result
        index

        Parameters
        ----------
        label : object
            Partial indexing with MultiIndex not allowed
        value : object
            Scalar value
        takeable : interpret the index as indexers, default False

        Notes
        -----
        This method *always* returns a new object. It is not particularly
        efficient but is provided for API compatibility with Series

        Returns
        -------
        series : SparseSeries
        """
        values = self.to_dense()

        # if the label doesn't exist, we will create a new object here
        # and possibily change the index
        new_values = values.set_value(label, value, takeable=takeable)
        if new_values is not None:
            values = new_values
        new_index = values.index
        values = SparseArray(values,
                             fill_value=self.fill_value,
                             kind=self.kind)
        self._data = SingleBlockManager(values, new_index)
        self._index = new_index

    def _set_values(self, key, value):

        # this might be inefficient as we have to recreate the sparse array
        # rather than setting individual elements, but have to convert
        # the passed slice/boolean that's in dense space into a sparse indexer
        # not sure how to do that!
        if isinstance(key, Series):
            key = key.values

        values = self.values.to_dense()
        values[key] = _index.convert_scalar(values, value)
        values = SparseArray(values,
                             fill_value=self.fill_value,
                             kind=self.kind)
        self._data = SingleBlockManager(values, self.index)

    def to_dense(self, sparse_only=False):
        """
        Convert SparseSeries to (dense) Series
        """
        if sparse_only:
            int_index = self.sp_index.to_int_index()
            index = self.index.take(int_index.indices)
            return Series(self.sp_values, index=index, name=self.name)
        else:
            return Series(self.values.to_dense(),
                          index=self.index,
                          name=self.name)

    @property
    def density(self):
        r = float(self.sp_index.npoints) / float(self.sp_index.length)
        return r

    def copy(self, deep=True):
        """
        Make a copy of the SparseSeries. Only the actual sparse values need to
        be copied
        """
        new_data = self._data
        if deep:
            new_data = self._data.copy()

        return self._constructor(new_data,
                                 sparse_index=self.sp_index,
                                 fill_value=self.fill_value).__finalize__(self)

    def reindex(self,
                index=None,
                method=None,
                copy=True,
                limit=None,
                **kwargs):
        """
        Conform SparseSeries to new Index

        See Series.reindex docstring for general behavior

        Returns
        -------
        reindexed : SparseSeries
        """
        new_index = _ensure_index(index)

        if self.index.equals(new_index):
            if copy:
                return self.copy()
            else:
                return self
        return self._constructor(self._data.reindex(new_index,
                                                    method=method,
                                                    limit=limit,
                                                    copy=copy),
                                 index=new_index).__finalize__(self)

    def sparse_reindex(self, new_index):
        """
        Conform sparse values to new SparseIndex

        Parameters
        ----------
        new_index : {BlockIndex, IntIndex}

        Returns
        -------
        reindexed : SparseSeries
        """
        if not isinstance(new_index, splib.SparseIndex):
            raise TypeError('new index must be a SparseIndex')

        block = self.block.sparse_reindex(new_index)
        new_data = SingleBlockManager(block, self.index)
        return self._constructor(new_data,
                                 index=self.index,
                                 sparse_index=new_index,
                                 fill_value=self.fill_value).__finalize__(self)

    def take(self, indices, axis=0, convert=True, *args, **kwargs):
        """
        Sparse-compatible version of ndarray.take

        Returns
        -------
        taken : ndarray
        """
        convert = nv.validate_take_with_convert(convert, args, kwargs)
        new_values = SparseArray.take(self.values, indices)
        new_index = self.index.take(indices)
        return self._constructor(new_values,
                                 index=new_index).__finalize__(self)

    def cumsum(self, axis=0, *args, **kwargs):
        """
        Cumulative sum of values. Preserves locations of NaN values

        Returns
        -------
        cumsum : SparseSeries if `self` has a null `fill_value` and a
                 generic Series otherwise
        """
        nv.validate_cumsum(args, kwargs)
        new_array = SparseArray.cumsum(self.values)
        if isinstance(new_array, SparseArray):
            return self._constructor(
                new_array, index=self.index,
                sparse_index=new_array.sp_index).__finalize__(self)
        # TODO: gh-12855 - return a SparseSeries here
        return Series(new_array, index=self.index).__finalize__(self)

    def dropna(self, axis=0, inplace=False, **kwargs):
        """
        Analogous to Series.dropna. If fill_value=NaN, returns a dense Series
        """
        # TODO: make more efficient
        axis = self._get_axis_number(axis or 0)
        dense_valid = self.to_dense().valid()
        if inplace:
            raise NotImplementedError("Cannot perform inplace dropna"
                                      " operations on a SparseSeries")
        if isnull(self.fill_value):
            return dense_valid
        else:
            dense_valid = dense_valid[dense_valid != self.fill_value]
            return dense_valid.to_sparse(fill_value=self.fill_value)

    @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
    def shift(self, periods, freq=None, axis=0):
        if periods == 0:
            return self.copy()

        # no special handling of fill values yet
        if not isnull(self.fill_value):
            shifted = self.to_dense().shift(periods, freq=freq, axis=axis)
            return shifted.to_sparse(fill_value=self.fill_value,
                                     kind=self.kind)

        if freq is not None:
            return self._constructor(
                self.sp_values,
                sparse_index=self.sp_index,
                index=self.index.shift(periods, freq),
                fill_value=self.fill_value).__finalize__(self)

        int_index = self.sp_index.to_int_index()
        new_indices = int_index.indices + periods
        start, end = new_indices.searchsorted([0, int_index.length])

        new_indices = new_indices[start:end]
        new_sp_index = _make_index(len(self), new_indices, self.sp_index)

        arr = self.values._simple_new(self.sp_values[start:end].copy(),
                                      new_sp_index,
                                      fill_value=np.nan)
        return self._constructor(arr, index=self.index).__finalize__(self)

    def combine_first(self, other):
        """
        Combine Series values, choosing the calling Series's values
        first. Result index will be the union of the two indexes

        Parameters
        ----------
        other : Series

        Returns
        -------
        y : Series
        """
        if isinstance(other, SparseSeries):
            other = other.to_dense()

        dense_combined = self.to_dense().combine_first(other)
        return dense_combined.to_sparse(fill_value=self.fill_value)

    def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False):
        """
        Create a scipy.sparse.coo_matrix from a SparseSeries with MultiIndex.

        Use row_levels and column_levels to determine the row and column
        coordinates respectively. row_levels and column_levels are the names
        (labels) or numbers of the levels. {row_levels, column_levels} must be
        a partition of the MultiIndex level names (or numbers).

        .. versionadded:: 0.16.0

        Parameters
        ----------
        row_levels : tuple/list
        column_levels : tuple/list
        sort_labels : bool, default False
            Sort the row and column labels before forming the sparse matrix.

        Returns
        -------
        y : scipy.sparse.coo_matrix
        rows : list (row labels)
        columns : list (column labels)

        Examples
        --------
        >>> from numpy import nan
        >>> s = Series([3.0, nan, 1.0, 3.0, nan, nan])
        >>> s.index = MultiIndex.from_tuples([(1, 2, 'a', 0),
                                              (1, 2, 'a', 1),
                                              (1, 1, 'b', 0),
                                              (1, 1, 'b', 1),
                                              (2, 1, 'b', 0),
                                              (2, 1, 'b', 1)],
                                              names=['A', 'B', 'C', 'D'])
        >>> ss = s.to_sparse()
        >>> A, rows, columns = ss.to_coo(row_levels=['A', 'B'],
                                         column_levels=['C', 'D'],
                                         sort_labels=True)
        >>> A
        <3x4 sparse matrix of type '<class 'numpy.float64'>'
                with 3 stored elements in COOrdinate format>
        >>> A.todense()
        matrix([[ 0.,  0.,  1.,  3.],
        [ 3.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]])
        >>> rows
        [(1, 1), (1, 2), (2, 1)]
        >>> columns
        [('a', 0), ('a', 1), ('b', 0), ('b', 1)]
        """
        A, rows, columns = _sparse_series_to_coo(self,
                                                 row_levels,
                                                 column_levels,
                                                 sort_labels=sort_labels)
        return A, rows, columns

    @classmethod
    def from_coo(cls, A, dense_index=False):
        """
        Create a SparseSeries from a scipy.sparse.coo_matrix.

        .. versionadded:: 0.16.0

        Parameters
        ----------
        A : scipy.sparse.coo_matrix
        dense_index : bool, default False
            If False (default), the SparseSeries index consists of only the
            coords of the non-null entries of the original coo_matrix.
            If True, the SparseSeries index consists of the full sorted
            (row, col) coordinates of the coo_matrix.

        Returns
        -------
        s : SparseSeries

        Examples
        ---------
        >>> from scipy import sparse
        >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])),
                               shape=(3, 4))
        >>> A
        <3x4 sparse matrix of type '<class 'numpy.float64'>'
                with 3 stored elements in COOrdinate format>
        >>> A.todense()
        matrix([[ 0.,  0.,  1.,  2.],
                [ 3.,  0.,  0.,  0.],
                [ 0.,  0.,  0.,  0.]])
        >>> ss = SparseSeries.from_coo(A)
        >>> ss
        0  2    1
           3    2
        1  0    3
        dtype: float64
        BlockIndex
        Block locations: array([0], dtype=int32)
        Block lengths: array([3], dtype=int32)
        """
        return _coo_to_sparse_series(A, dense_index=dense_index)
示例#25
0
    def __new__(cls, data=None, index=None, crs=None, **kwargs):
        # we need to use __new__ because we want to return Series instance
        # instead of GeoSeries instance in case of non-geometry data

        if hasattr(data, "crs") and crs:
            if not data.crs:
                # make a copy to avoid setting CRS to passed GeometryArray
                data = data.copy()
            else:
                if not data.crs == crs:
                    warnings.warn(
                        "CRS mismatch between CRS of the passed geometries "
                        "and 'crs'. Use 'GeoDataFrame.set_crs(crs, "
                        "allow_override=True)' to overwrite CRS or "
                        "'GeoSeries.to_crs(crs)' to reproject geometries. "
                        "CRS mismatch will raise an error in the future versions "
                        "of GeoPandas.",
                        FutureWarning,
                        stacklevel=2,
                    )
                    # TODO: raise error in 0.9 or 0.10.

        if isinstance(data, SingleBlockManager):
            if isinstance(data.blocks[0].dtype, GeometryDtype):
                if data.blocks[0].ndim == 2:
                    # bug in pandas 0.23 where in certain indexing operations
                    # (such as .loc) a 2D ExtensionBlock (still with 1D values
                    # is created) which results in other failures
                    # bug in pandas <= 0.25.0 when len(values) == 1
                    #   (https://github.com/pandas-dev/pandas/issues/27785)
                    from pandas.core.internals import ExtensionBlock

                    values = data.blocks[0].values
                    block = ExtensionBlock(values,
                                           slice(0, len(values), 1),
                                           ndim=1)
                    data = SingleBlockManager([block],
                                              data.axes[0],
                                              fastpath=True)
                self = super(GeoSeries, cls).__new__(cls)
                super(GeoSeries, self).__init__(data, index=index, **kwargs)
                self.crs = getattr(self.values, "crs", crs)
                return self
            warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2)
            return Series(data, index=index, **kwargs)

        if isinstance(data, BaseGeometry):
            # fix problem for scalar geometries passed, ensure the list of
            # scalars is of correct length if index is specified
            n = len(index) if index is not None else 1
            data = [data] * n

        name = kwargs.pop("name", None)

        if not is_geometry_type(data):
            # if data is None and dtype is specified (eg from empty overlay
            # test), specifying dtype raises an error:
            # https://github.com/pandas-dev/pandas/issues/26469
            kwargs.pop("dtype", None)
            # Use Series constructor to handle input data
            s = pd.Series(data, index=index, name=name, **kwargs)
            # prevent trying to convert non-geometry objects
            if s.dtype != object:
                if s.empty:
                    s = s.astype(object)
                else:
                    warnings.warn(_SERIES_WARNING_MSG,
                                  FutureWarning,
                                  stacklevel=2)
                    return s
            # try to convert to GeometryArray, if fails return plain Series
            try:
                data = from_shapely(s.values, crs)
            except TypeError:
                warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2)
                return s
            index = s.index
            name = s.name

        self = super(GeoSeries, cls).__new__(cls)
        super(GeoSeries, self).__init__(data, index=index, name=name, **kwargs)

        if not self.crs:
            self.crs = crs
        self._invalidate_sindex()
        return self