def __init__(self, values, items, ref_items, ndim=2): if issubclass(values.dtype.type, basestring): values = np.array(values, dtype=object) assert(values.ndim == ndim) assert(len(items) == len(values)) self.values = values self.ndim = ndim self.items = _ensure_index(items) self.ref_items = _ensure_index(ref_items) self._check_integrity()
def get_indexer(self, target, method=None): """ Parameters ---------- target : Index method : Returns ------- (indexer, mask) """ if method: method = method.upper() aliases = { 'FFILL' : 'PAD', 'BFILL' : 'BACKFILL' } target = _ensure_index(target) method = aliases.get(method, method) indexer, mask = _tseries.getFillVec(self, target, self.indexMap, target.indexMap, method) return indexer, mask
def reindex_items(self, new_items): """ """ new_items = _ensure_index(new_items) data = self if not data.is_consolidated(): data = data.consolidate() return data.reindex_items(new_items) # TODO: this part could be faster (!) new_items, _, mask = self.items.reindex(new_items) notmask = -mask new_blocks = [] for block in self.blocks: newb = block.reindex_items_from(new_items) if len(newb.items) > 0: new_blocks.append(newb) if notmask.any(): extra_items = new_items[notmask] block_shape = list(self.shape) block_shape[0] = len(extra_items) block_values = np.empty(block_shape, dtype=np.float64) block_values.fill(nan) na_block = make_block(block_values, extra_items, new_items) new_blocks.append(na_block) new_blocks = _consolidate(new_blocks, new_items) new_axes = list(self.axes) new_axes[0] = new_items return BlockManager(new_blocks, new_axes)
def _init_dict(self, data, axes, dtype=None): items = axes[0] # prefilter if items passed if items is not None: items = _ensure_index(items) data = dict((k, v) for k, v in data.iteritems() if k in items) else: items = Index(_try_sort(data.keys())) # figure out the index, if necessary if index is None: index = extract_index(data) # don't force copy because getting jammed in an ndarray anyway # homogenized = _homogenize(data, index, columns, dtype) data, index, columns = _homogenize(data, intersect=intersect) # segregates dtypes and forms blocks matching to columns blocks = form_blocks(homogenized, index, columns) # consolidate for now mgr = BlockManager(blocks, [columns, index]) return mgr.consolidate()
def __init__(self, levels, labels, sortorder=None): self.levels = [_ensure_index(lev) for lev in levels] self.labels = [np.asarray(labs, dtype=np.int32) for labs in labels] if sortorder is not None: self.sortorder = int(sortorder) else: self.sortorder = sortorder
def set_axis(self, axis, value): cur_axis = self.axes[axis] if len(value) != len(cur_axis): raise Exception('Length mismatch (%d vs %d)' % (len(value), len(cur_axis))) self.axes[axis] = _ensure_index(value) if axis == 0: for block in self.blocks: block.set_ref_items(self.items, maybe_rename=True)
def __init__(self, blocks, axes, skip_integrity_check=False): self.axes = [_ensure_index(ax) for ax in axes] self.blocks = blocks ndim = len(axes) for block in blocks: assert(ndim == block.values.ndim) if not skip_integrity_check: self._verify_integrity()
def _set_index(self, index): indexTypes = ndarray, Index, list, tuple if not isinstance(index, indexTypes): raise TypeError("Expected index to be in %s; was %s." % (indexTypes, type(index))) if len(self) != len(index): raise AssertionError('Lengths of index and values did not match!') self._index = _ensure_index(index)
def _reindex_axis(self, new_index, fill_method, axis, copy): new_index = _ensure_index(new_index) cur_axis = self._data.axes[axis] if cur_axis.equals(new_index) and not copy: return self if axis == 0: new_data = self._data.reindex_items(new_index) else: new_data = self._data.reindex_axis(new_index, axis=axis, method=fill_method) return self._constructor(new_data)
def __setstate__(self, state): # discard anything after 3rd, support beta pickling format for a little # while longer ax_arrays, bvalues, bitems = state[:3] self.axes = [_ensure_index(ax) for ax in ax_arrays] blocks = [] for values, items in zip(bvalues, bitems): blk = make_block(values, items, self.axes[0]) blocks.append(blk) self.blocks = blocks
def __init__(self, levels, labels, sortorder=None, names=None, consistent=None): self.levels = [_ensure_index(lev) for lev in levels] self.labels = [np.asarray(labs, dtype=np.int32) for labs in labels] if names is None: self.names = ['level_%d' % i for i in range(self.nlevels)] else: assert(len(names) == self.nlevels) self.names = list(names) if sortorder is not None: self.sortorder = int(sortorder) else: self.sortorder = sortorder
def get_indexer(self, target, method=None): """ Compute indexer and mask for new index given the current index. The indexer should be then used as an input to ndarray.take to align the current data to the new index. The mask determines whether labels are found or not in the current index Parameters ---------- target : Index method : {'pad', 'ffill', 'backfill', 'bfill'} pad / ffill: propagate LAST valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap Notes ----- This is a low-level method and probably should be used at your own risk Examples -------- >>> indexer, mask = index.get_indexer(new_index) >>> new_values = cur_values.take(indexer) >>> new_values[-mask] = np.nan Returns ------- (indexer, mask) : (ndarray, ndarray) """ if method: method = method.upper() aliases = { 'FFILL' : 'PAD', 'BFILL' : 'BACKFILL' } target = _ensure_index(target) method = aliases.get(method, method) indexer, mask = _tseries.getFillVec(self, target, self.indexMap, target.indexMap, method) return indexer, mask
def _interleave(self, items): """ Return ndarray from blocks with specified item order Items must be contained in the blocks """ dtype = _interleaved_dtype(self.blocks) items = _ensure_index(items) result = np.empty(self.shape, dtype=dtype) itemmask = np.zeros(len(items), dtype=bool) # By construction, all of the item should be covered by one of the # blocks for block in self.blocks: indexer, mask = items.get_indexer(block.items) assert(mask.all()) result[indexer] = block.values itemmask[indexer] = 1 assert(itemmask.all()) return result
def reindex(self, index=None, method=None, copy=True): """Conform Series to new Index Parameters ---------- index : array-like Preferably an Index object (to avoid duplicating data) method : {'backfill', 'bfill', 'pad', 'ffill', None} Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap copy : boolean, default True Return a new object, even if the passed indexes are the same Returns ------- reindexed : Series """ if self.index.equals(index): if copy: return self.copy() else: return self index = _ensure_index(index) if len(self.index) == 0: return Series(nan, index=index) new_index, fill_vec, mask = self.index.reindex(index, method=method) new_values = self.values.take(fill_vec) notmask = -mask if notmask.any(): if issubclass(new_values.dtype.type, np.int_): new_values = new_values.astype(float) elif issubclass(new_values.dtype.type, np.bool_): new_values = new_values.astype(object) np.putmask(new_values, notmask, nan) return Series(new_values, index=new_index)
def union(self, other): """ Form the union of two Index objects and sorts if possible Parameters ---------- other : Index or array-like Returns ------- union : Index """ if not hasattr(other, '__iter__'): raise Exception('Input must be iterable!') if len(other) == 0 or self.equals(other): return self if len(self) == 0: return _ensure_index(other) return Index(_tseries.fast_unique_multiple([self, other]))
def _init_matrix(self, data, axes, dtype=None, copy=False): values = _prep_ndarray(data, copy=copy) if dtype is not None: try: values = values.astype(dtype) except Exception: raise ValueError('failed to cast to %s' % dtype) shape = values.shape fixed_axes = [] for i, ax in enumerate(axes): if ax is None: ax = _default_index(shape[i]) else: ax = _ensure_index(ax) fixed_axes.append(ax) items = fixed_axes[0] block = make_block(values, items, items) return BlockManager([block], fixed_axes)
def _init_dict(self, data, axes, dtype=None): items, major, minor = axes # prefilter if items passed if items is not None: items = _ensure_index(items) data = dict((k, v) for k, v in data.iteritems() if k in items) else: items = Index(_try_sort(data.keys())) for k, v in data.iteritems(): if not isinstance(v, DataFrame): data[k] = DataFrame(v) if major is None: indexes = [v.index for v in data.values()] major = _union_indexes(indexes) if minor is None: indexes = [v.columns for v in data.values()] minor = _union_indexes(indexes) axes = [items, major, minor] reshaped_data = data.copy() # shallow # homogenize for k, v in data.iteritems(): v = v.reindex(index=major, columns=minor, copy=False) if dtype is not None: v = v.astype(dtype) values = v.values shape = values.shape reshaped_data[k] = values.reshape((1,) + shape) # segregates dtypes and forms blocks matching to columns blocks = form_blocks(reshaped_data, axes) mgr = BlockManager(blocks, axes).consolidate() return mgr
def reindex_axis(self, new_axis, method=None, axis=0): if axis == 0: assert(method is None) return self.reindex_items(new_axis) new_axis = _ensure_index(new_axis) cur_axis = self.axes[axis] new_axis, indexer, mask = cur_axis.reindex(new_axis, method) # TODO: deal with length-0 case? or does it fall out? notmask = -mask needs_masking = len(new_axis) > 0 and notmask.any() new_blocks = [] for block in self.blocks: newb = block.reindex_axis(indexer, notmask, needs_masking, axis=axis) new_blocks.append(newb) new_axes = list(self.axes) new_axes[axis] = new_axis return BlockManager(new_blocks, new_axes)
def __setstate__(self, state): items, ref_items, values = state self.items = _ensure_index(items) self.ref_items = _ensure_index(ref_items) self.values = values self.ndim = values.ndim
def __set__(self, obj, value): value = _ensure_index(value) setattr(obj, self.cache_field, value)
def set_items_norename(self, value): value = _ensure_index(value) self.axes[0] = value for block in self.blocks: block.set_ref_items(value, maybe_rename=False)