def _init_spmatrix(self, data, index, columns, dtype=None, fill_value=None): """ Init self from scipy.sparse matrix. """ index, columns = self._prep_index(data, index, columns) data = data.tocoo() N = len(index) # Construct a dict of SparseSeries sdict = {} values = Series(data.data, index=data.row, copy=False) for col, rowvals in values.groupby(data.col): # get_blocks expects int32 row indices in sorted order rowvals = rowvals.sort_index() rows = rowvals.index.values.astype(np.int32) blocs, blens = get_blocks(rows) sdict[columns[col]] = SparseSeries( rowvals.values, index=index, fill_value=fill_value, sparse_index=BlockIndex(N, blocs, blens)) # Add any columns that were empty and thus not grouped on above sdict.update({column: SparseSeries(index=index, fill_value=fill_value, sparse_index=BlockIndex(N, [], [])) for column in columns if column not in sdict}) return self._init_dict(sdict, index, columns, dtype)
def _make_index(length, indices, kind): if kind == 'block' or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == 'integer' or isinstance(kind, IntIndex): index = IntIndex(length, indices) else: # pragma: no cover raise ValueError('must be block or integer type') return index
def make_sparse_index(length, indices, kind): if kind == "block" or isinstance(kind, BlockIndex): locs, lens = splib.get_blocks(indices) index = BlockIndex(length, locs, lens) elif kind == "integer" or isinstance(kind, IntIndex): index = IntIndex(length, indices) else: # pragma: no cover raise ValueError("must be block or integer type") return index
def _concat_same_type( cls: Type[SparseArrayT], to_concat: Sequence[SparseArrayT] ) -> SparseArrayT: fill_value = to_concat[0].fill_value values = [] length = 0 if to_concat: sp_kind = to_concat[0].kind else: sp_kind = "integer" if sp_kind == "integer": indices = [] for arr in to_concat: idx = arr.sp_index.to_int_index().indices.copy() idx += length # TODO: wraparound length += arr.sp_index.length values.append(arr.sp_values) indices.append(idx) data = np.concatenate(values) indices = np.concatenate(indices) sp_index = IntIndex(length, indices) else: # when concatenating block indices, we don't claim that you'll # get an identical index as concatenating the values and then # creating a new index. We don't want to spend the time trying # to merge blocks across arrays in `to_concat`, so the resulting # BlockIndex may have more blocks. blengths = [] blocs = [] for arr in to_concat: idx = arr.sp_index.to_block_index() values.append(arr.sp_values) blocs.append(idx.blocs.copy() + length) blengths.append(idx.blengths) length += arr.sp_index.length data = np.concatenate(values) blocs = np.concatenate(blocs) blengths = np.concatenate(blengths) sp_index = BlockIndex(length, blocs, blengths) return cls(data, sparse_index=sp_index, fill_value=fill_value)
def __init__(self, data=None, index=None, sparse_index=None, kind='block', fill_value=None, name=None, dtype=None, copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() else: if data is None: data = [] if isinstance(data, Series) and name is None: name = data.name if isinstance(data, SparseArray): if index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value data = np.asarray(data) elif isinstance(data, SparseSeries): if index is None: index = data.index.view() if fill_value is None: fill_value = data.fill_value # extract the SingleBlockManager data = data._data elif isinstance(data, (Series, dict)): if index is None: index = data.index.view() data = Series(data) res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res else: assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype) if index is None: index = data.index.view() else: data = data.reindex(index, copy=False) else: length = len(index) if data == fill_value or (isnull(data) and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) data = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) v = data data = np.empty(length) data.fill(v) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): if copy: data = data.copy() else: # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) data = SingleBlockManager(data, index) generic.NDFrame.__init__(self, data) self.index = index self.name = name
def _concat_same_type(cls, to_concat): fill_values = [x.fill_value for x in to_concat] fill_value = fill_values[0] # np.nan isn't a singleton, so we may end up with multiple # NaNs here, so we ignore tha all NA case too. if not (len(set(fill_values)) == 1 or isna(fill_values).all()): warnings.warn( "Concatenating sparse arrays with multiple fill " f"values: '{fill_values}'. Picking the first and " "converting the rest.", PerformanceWarning, stacklevel=6, ) keep = to_concat[0] to_concat2 = [keep] for arr in to_concat[1:]: to_concat2.append(cls(np.asarray(arr), fill_value=fill_value)) to_concat = to_concat2 values = [] length = 0 if to_concat: sp_kind = to_concat[0].kind else: sp_kind = "integer" if sp_kind == "integer": indices = [] for arr in to_concat: idx = arr.sp_index.to_int_index().indices.copy() idx += length # TODO: wraparound length += arr.sp_index.length values.append(arr.sp_values) indices.append(idx) data = np.concatenate(values) indices = np.concatenate(indices) sp_index = IntIndex(length, indices) else: # when concatenating block indices, we don't claim that you'll # get an identical index as concating the values and then # creating a new index. We don't want to spend the time trying # to merge blocks across arrays in `to_concat`, so the resulting # BlockIndex may have more blocs. blengths = [] blocs = [] for arr in to_concat: idx = arr.sp_index.to_block_index() values.append(arr.sp_values) blocs.append(idx.blocs.copy() + length) blengths.append(idx.blengths) length += arr.sp_index.length data = np.concatenate(values) blocs = np.concatenate(blocs) blengths = np.concatenate(blengths) sp_index = BlockIndex(length, blocs, blengths) return cls(data, sparse_index=sp_index, fill_value=fill_value)
def __init__(self, data=None, index=None, sparse_index=None, kind='block', fill_value=None, name=None, dtype=None, copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() else: if data is None: data = [] if isinstance(data, Series) and name is None: name = data.name if isinstance(data, SparseArray): if index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value data = np.asarray(data) elif isinstance(data, SparseSeries): if index is None: index = data.index.view() if fill_value is None: fill_value = data.fill_value # extract the SingleBlockManager data = data._data elif isinstance(data, (Series, dict)): data = Series(data, index=index) index = data.index.view() res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res else: assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype) if index is None: index = data.index.view() elif not data.index.equals(index) or copy: # pragma: no cover # GH#19275 SingleBlockManager input should only be called # internally raise AssertionError('Cannot pass both SingleBlockManager ' '`data` argument and a different ' '`index` argument. `copy` must ' 'be False.') else: length = len(index) if data == fill_value or (isna(data) and isna(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) data = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) v = data data = np.empty(length) data.fill(v) if index is None: index = ibase.default_index(sparse_index.length) index = ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): if copy: data = data.copy() else: # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) data = SingleBlockManager(data, index) generic.NDFrame.__init__(self, data) self.index = index self.name = name