mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = np.arange(length, dtype=np.int32)[mask] index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index def _make_index(length, indices, kind): if kind == 'block' or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == 'integer' or isinstance(kind, IntIndex): index = IntIndex(length, indices) else: # pragma: no cover raise ValueError('must be block or integer type') return index ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method, comp_method=_arith_method, use_numexpr=False)
index = None need_reindex = False for _, series in compat.iteritems(series_dict): if not np.isnan(series.fill_value): raise TypeError('this method is only valid with NaN fill values') if index is None: index = series.sp_index elif not series.sp_index.equals(index): need_reindex = True index = index.intersect(series.sp_index) if need_reindex: output = {} for name, series in compat.iteritems(series_dict): if not series.sp_index.equals(index): series = series.sparse_reindex(index) output[name] = series else: output = series_dict return output # use unaccelerated ops for sparse objects ops.add_flex_arithmetic_methods(SparseDataFrame, **ops.frame_flex_funcs) ops.add_special_arithmetic_methods(SparseDataFrame, **ops.frame_special_funcs)
need_reindex = False for _, series in compat.iteritems(series_dict): if not np.isnan(series.fill_value): raise TypeError('this method is only valid with NaN fill values') if index is None: index = series.sp_index elif not series.sp_index.equals(index): need_reindex = True index = index.intersect(series.sp_index) if need_reindex: output = {} for name, series in compat.iteritems(series_dict): if not series.sp_index.equals(index): series = series.sparse_reindex(index) output[name] = series else: output = series_dict return output # use unaccelerated ops for sparse objects ops.add_flex_arithmetic_methods(SparseDataFrame, use_numexpr=False, **ops.frame_flex_funcs) ops.add_special_arithmetic_methods(SparseDataFrame, use_numexpr=False, **ops.frame_special_funcs)
mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value def _make_index(length, indices, kind): if kind == 'block' or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == 'integer' or isinstance(kind, IntIndex): index = IntIndex(length, indices) else: # pragma: no cover raise ValueError('must be block or integer type') return index ops.add_special_arithmetic_methods(SparseArray, arith_method=ops._arith_method_SPARSE_ARRAY, comp_method=ops._arith_method_SPARSE_ARRAY, bool_method=ops._arith_method_SPARSE_ARRAY)
# same. So we have to check the both of its type and value. mask = splib.make_mask_object_ndarray(arr, fill_value) else: mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value def _make_index(length, indices, kind): if kind == 'block' or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == 'integer' or isinstance(kind, IntIndex): index = IntIndex(length, indices) else: # pragma: no cover raise ValueError('must be block or integer type') return index ops.add_special_arithmetic_methods(SparseArray)
else: mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = np.arange(length, dtype=np.int32)[mask] index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index def _make_index(length, indices, kind): if kind == 'block' or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == 'integer' or isinstance(kind, IntIndex): index = IntIndex(length, indices) else: # pragma: no cover raise ValueError('must be block or integer type') return index ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method, comp_method=_arith_method, use_numexpr=False)
default_fill_value=self.default_fill_value, default_kind=self.default_kind) # TODO: allow SparsePanel to work with flex arithmetic. # pow and mod only work for scalars for now def pow(self, val, *args, **kwargs): """wrapper around `__pow__` (only works for scalar values)""" return self.__pow__(val) def mod(self, val, *args, **kwargs): """wrapper around `__mod__` (only works for scalar values""" return self.__mod__(val) # Sparse objects opt out of numexpr SparsePanel._add_aggregate_operations(use_numexpr=False) ops.add_special_arithmetic_methods(SparsePanel, use_numexpr=False, **ops.panel_special_funcs) SparseWidePanel = SparsePanel def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): from pandas.core.panel import _get_combined_index output = {} for item, df in compat.iteritems(frames): if not isinstance(df, SparseDataFrame): df = SparseDataFrame(df, default_kind=kind, default_fill_value=fill_value) output[item] = df if index is None: all_indexes = [df.index for df in output.values()]
Examples --------- >>> from scipy import sparse >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4)) >>> A <3x4 sparse matrix of type '<class 'numpy.float64'>' with 3 stored elements in COOrdinate format> >>> A.todense() matrix([[ 0., 0., 1., 2.], [ 3., 0., 0., 0.], [ 0., 0., 0., 0.]]) >>> ss = SparseSeries.from_coo(A) >>> ss 0 2 1 3 2 1 0 3 dtype: float64 BlockIndex Block locations: array([0], dtype=int32) Block lengths: array([3], dtype=int32) """ return _coo_to_sparse_series(A, dense_index=dense_index) # overwrite series methods with unaccelerated Sparse-specific versions ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs) ops.add_special_arithmetic_methods(SparseSeries, **ops.sparse_series_special_funcs)
length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value def _make_index(length, indices, kind): if kind == 'block' or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == 'integer' or isinstance(kind, IntIndex): index = IntIndex(length, indices) else: # pragma: no cover raise ValueError('must be block or integer type') return index ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method_SPARSE_ARRAY, comp_method=_arith_method_SPARSE_ARRAY, bool_method=_arith_method_SPARSE_ARRAY, use_numexpr=False)
mask = splib.make_mask_object_ndarray(arr, fill_value) else: mask = arr != fill_value length = len(arr) if length != mask.size: # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] return sparsified_values, index, fill_value def _make_index(length, indices, kind): if kind == 'block' or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == 'integer' or isinstance(kind, IntIndex): index = IntIndex(length, indices) else: # pragma: no cover raise ValueError('must be block or integer type') return index ops.add_special_arithmetic_methods(SparseArray, **ops.sparse_array_special_funcs)
s : SparseSeries Examples --------- >>> from scipy import sparse >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4)) >>> A <3x4 sparse matrix of type '<class 'numpy.float64'>' with 3 stored elements in COOrdinate format> >>> A.todense() matrix([[ 0., 0., 1., 2.], [ 3., 0., 0., 0.], [ 0., 0., 0., 0.]]) >>> ss = pd.SparseSeries.from_coo(A) >>> ss 0 2 1 3 2 1 0 3 dtype: float64 BlockIndex Block locations: array([0], dtype=int32) Block lengths: array([3], dtype=int32) """ return _coo_to_sparse_series(A, dense_index=dense_index) # overwrite series methods with unaccelerated Sparse-specific versions ops.add_flex_arithmetic_methods(SparseSeries) ops.add_special_arithmetic_methods(SparseSeries)
# non-scalars, I'm not sure whether it's worth it at the moment result = com._fill_zeros(result, x, y, name, fill_zeros) return result @Substitution(name) @Appender(_agg_doc) def f(self, other, axis=0): return self._combine(other, na_op, axis=axis) f.__name__ = name return f # add `div`, `mul`, `pow`, etc.. ops.add_flex_arithmetic_methods( cls, _panel_arith_method, use_numexpr=use_numexpr, flex_comp_method=ops._comp_method_PANEL) Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, stat_axis=1, aliases={'major': 'major_axis', 'minor': 'minor_axis'}, slicers={'major_axis': 'index', 'minor_axis': 'columns'}) ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) Panel._add_aggregate_operations() Panel._add_numeric_operations() WidePanel = Panel LongPanel = DataFrame
Parameters ---------- other : Series Returns ------- y : Series """ if isinstance(other, SparseSeries): other = other.to_dense() dense_combined = self.to_dense().combine_first(other) return dense_combined.to_sparse(fill_value=self.fill_value) @Appender(SparseAccessor.to_coo.__doc__) def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False): A, rows, columns = _sparse_series_to_coo(self, row_levels, column_levels, sort_labels=sort_labels) return A, rows, columns @classmethod @Appender(SparseAccessor.from_coo.__doc__) def from_coo(cls, A, dense_index=False): return _coo_to_sparse_series(A, dense_index=dense_index) # overwrite series methods with unaccelerated Sparse-specific versions ops.add_flex_arithmetic_methods(SparseSeries) ops.add_special_arithmetic_methods(SparseSeries)
for _, series in compat.iteritems(series_dict): if not np.isnan(series.fill_value): raise TypeError('this method is only valid with NaN fill values') if index is None: index = series.sp_index elif not series.sp_index.equals(index): need_reindex = True index = index.intersect(series.sp_index) if need_reindex: output = {} for name, series in compat.iteritems(series_dict): if not series.sp_index.equals(index): series = series.sparse_reindex(index) output[name] = series else: output = series_dict return output # use unaccelerated ops for sparse objects ops.add_flex_arithmetic_methods(SparseDataFrame, use_numexpr=False, **ops.frame_flex_funcs) ops.add_special_arithmetic_methods(SparseDataFrame, use_numexpr=False, **ops.frame_special_funcs)
index = None need_reindex = False for _, series in compat.iteritems(series_dict): if not np.isnan(series.fill_value): raise TypeError('this method is only valid with NaN fill values') if index is None: index = series.sp_index elif not series.sp_index.equals(index): need_reindex = True index = index.intersect(series.sp_index) if need_reindex: output = {} for name, series in compat.iteritems(series_dict): if not series.sp_index.equals(index): series = series.sparse_reindex(index) output[name] = series else: output = series_dict return output # use unaccelerated ops for sparse objects ops.add_flex_arithmetic_methods(SparseDataFrame) ops.add_special_arithmetic_methods(SparseDataFrame)
<3x4 sparse matrix of type '<class 'numpy.float64'>' with 3 stored elements in COOrdinate format> >>> A.todense() matrix([[ 0., 0., 1., 2.], [ 3., 0., 0., 0.], [ 0., 0., 0., 0.]]) >>> ss = SparseSeries.from_coo(A) >>> ss 0 2 1 3 2 1 0 3 dtype: float64 BlockIndex Block locations: array([0], dtype=int32) Block lengths: array([3], dtype=int32) """ return _coo_to_sparse_series(A, dense_index=dense_index) # overwrite series methods with unaccelerated versions ops.add_special_arithmetic_methods(SparseSeries, use_numexpr=False, **ops.series_special_funcs) ops.add_flex_arithmetic_methods(SparseSeries, use_numexpr=False, **ops.series_flex_funcs) # overwrite basic arithmetic to use SparseSeries version # force methods to overwrite previous definitions. ops.add_special_arithmetic_methods(SparseSeries, _arith_method, comp_method=_arith_method, bool_method=None, use_numexpr=False, force=True)
index = None need_reindex = False for _, series in series_dict.items(): if not np.isnan(series.fill_value): raise TypeError('this method is only valid with NaN fill values') if index is None: index = series.sp_index elif not series.sp_index.equals(index): need_reindex = True index = index.intersect(series.sp_index) if need_reindex: output = {} for name, series in series_dict.items(): if not series.sp_index.equals(index): series = series.sparse_reindex(index) output[name] = series else: output = series_dict return output # use unaccelerated ops for sparse objects ops.add_flex_arithmetic_methods(SparseDataFrame) ops.add_special_arithmetic_methods(SparseDataFrame)
# non-scalars, I'm not sure whether it's worth it at the moment result = com._fill_zeros(result, y, fill_zeros) return result @Substitution(name) @Appender(_agg_doc) def f(self, other, axis=0): return self._combine(other, na_op, axis=axis) f.__name__ = name return f # add `div`, `mul`, `pow`, etc.. ops.add_flex_arithmetic_methods( cls, _panel_arith_method, use_numexpr=use_numexpr, flex_comp_method=ops._comp_method_PANEL) Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, stat_axis=1, aliases={'major': 'major_axis', 'minor': 'minor_axis'}, slicers={'major_axis': 'index', 'minor_axis': 'columns'}) ops.add_special_arithmetic_methods(Panel, **ops.panel_special_funcs) Panel._add_aggregate_operations() Panel._add_numeric_operations() WidePanel = Panel LongPanel = DataFrame
>>> A <3x4 sparse matrix of type '<class 'numpy.float64'>' with 3 stored elements in COOrdinate format> >>> A.todense() matrix([[ 0., 0., 1., 2.], [ 3., 0., 0., 0.], [ 0., 0., 0., 0.]]) >>> ss = SparseSeries.from_coo(A) >>> ss 0 2 1 3 2 1 0 3 dtype: float64 BlockIndex Block locations: array([0], dtype=int32) Block lengths: array([3], dtype=int32) """ return _coo_to_sparse_series(A, dense_index=dense_index) # overwrite series methods with unaccelerated versions ops.add_special_arithmetic_methods(SparseSeries, **ops.series_special_funcs) ops.add_flex_arithmetic_methods(SparseSeries, **ops.series_flex_funcs) # overwrite basic arithmetic to use SparseSeries version # force methods to overwrite previous definitions. ops.add_special_arithmetic_methods(SparseSeries, ops._arith_method_SPARSE_SERIES, comp_method=ops._arith_method_SPARSE_SERIES, bool_method=None, force=True)