def _sanitize_column(self, key, value, **kwargs): """ Creates a new SparseArray from the input value. Parameters ---------- key : object value : scalar, Series, or array-like kwargs : dict Returns ------- sanitized_column : SparseArray """ sp_maker = lambda x, index=None: SparseArray(x, index=index, fill_value=self. _default_fill_value, kind=self._default_kind) if isinstance(value, SparseSeries): clean = value.reindex(self.index).as_sparse_array( fill_value=self._default_fill_value, kind=self._default_kind) elif isinstance(value, SparseArray): if len(value) != len(self.index): raise AssertionError('Length of values does not match ' 'length of index') clean = value elif hasattr(value, '__iter__'): if isinstance(value, Series): clean = value.reindex(self.index) if not isinstance(value, SparseSeries): clean = sp_maker(clean) else: if len(value) != len(self.index): raise AssertionError('Length of values does not match ' 'length of index') clean = sp_maker(value) # Scalar else: clean = sp_maker(value, self.index) # always return a SparseArray! return clean
def _init_dict(self, data, index, columns, dtype=None): # pre-filter out columns if we passed it if columns is not None: columns = _ensure_index(columns) data = dict( (k, v) for k, v in compat.iteritems(data) if k in columns) else: columns = Index(_try_sort(list(data.keys()))) if index is None: index = extract_index(list(data.values())) sp_maker = lambda x: SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, copy=True, dtype=dtype) sdict = DataFrame() for k, v in compat.iteritems(data): if isinstance(v, Series): # Force alignment, no copy necessary if not v.index.equals(index): v = v.reindex(index) if not isinstance(v, SparseSeries): v = sp_maker(v.values) elif isinstance(v, SparseArray): v = v.copy() else: if isinstance(v, dict): v = [v.get(i, nan) for i in index] v = sp_maker(v) sdict[k] = v # TODO: figure out how to handle this case, all nan's? # add in any other columns we want to have (completeness) nan_vec = np.empty(len(index)) nan_vec.fill(nan) for c in columns: if c not in sdict: sdict[c] = sp_maker(nan_vec) return to_manager(sdict, columns, index)
def _init_dict(self, data, index, columns, dtype=None): # pre-filter out columns if we passed it if columns is not None: columns = _ensure_index(columns) data = {k: v for k, v in compat.iteritems(data) if k in columns} else: keys = com._dict_keys_to_ordered_list(data) columns = Index(keys) if index is None: index = extract_index(list(data.values())) sp_maker = lambda x: SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, copy=True, dtype=dtype) sdict = {} for k, v in compat.iteritems(data): if isinstance(v, Series): # Force alignment, no copy necessary if not v.index.equals(index): v = v.reindex(index) if not isinstance(v, SparseSeries): v = sp_maker(v.values) elif isinstance(v, SparseArray): v = v.copy() else: if isinstance(v, dict): v = [v.get(i, np.nan) for i in index] v = sp_maker(v) sdict[k] = v # TODO: figure out how to handle this case, all nan's? # add in any other columns we want to have (completeness) nan_arr = np.empty(len(index), dtype='float64') nan_arr.fill(np.nan) nan_arr = sp_maker(nan_arr) sdict.update((c, nan_arr) for c in columns if c not in sdict) return to_manager(sdict, columns, index)
def __init__(self, data=None, index=None, columns=None, default_kind=None, default_fill_value=None, dtype=None, copy=False): # pick up the defaults from the Sparse structures if isinstance(data, SparseDataFrame): if index is None: index = data.index if columns is None: columns = data.columns if default_fill_value is None: default_fill_value = data.default_fill_value if default_kind is None: default_kind = data.default_kind elif isinstance(data, (SparseSeries, SparseArray)): if index is None: index = data.index if default_fill_value is None: default_fill_value = data.fill_value if columns is None and hasattr(data, 'name'): columns = [data.name] if columns is None: raise Exception("cannot pass a series w/o a name or columns") data = {columns[0]: data} if default_fill_value is None: default_fill_value = np.nan if default_kind is None: default_kind = 'block' self._default_kind = default_kind self._default_fill_value = default_fill_value if is_scipy_sparse(data): mgr = self._init_spmatrix(data, index, columns, dtype=dtype, fill_value=default_fill_value) elif isinstance(data, dict): mgr = self._init_dict(data, index, columns, dtype=dtype) elif isinstance(data, (np.ndarray, list)): mgr = self._init_matrix(data, index, columns, dtype=dtype) elif isinstance(data, SparseDataFrame): mgr = self._init_mgr(data._data, dict(index=index, columns=columns), dtype=dtype, copy=copy) elif isinstance(data, DataFrame): mgr = self._init_dict(data, data.index, data.columns, dtype=dtype) elif isinstance(data, Series): mgr = self._init_dict(data.to_frame(), data.index, columns=None, dtype=dtype) elif isinstance(data, BlockManager): mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) elif data is None: data = DataFrame() if index is None: index = Index([]) else: index = _ensure_index(index) if columns is None: columns = Index([]) else: for c in columns: data[c] = SparseArray(np.nan, index=index, kind=self._default_kind, fill_value=self._default_fill_value) mgr = to_manager(data, columns, index) if dtype is not None: mgr = mgr.astype(dtype) else: msg = ('SparseDataFrame called with unknown type "{data_type}" ' 'for data argument') raise TypeError(msg.format(data_type=type(data).__name__)) generic.NDFrame.__init__(self, mgr)
def sp_maker(x, index=None): return SparseArray(x, index=index, fill_value=self._default_fill_value, kind=self._default_kind)
def sp_maker(x): return SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, copy=True, dtype=dtype)