def _make_long_index(major_values, minor_values): major_axis = Index(sorted(set(major_values))) minor_axis = Index(sorted(set(minor_values))) major_labels, _ = _tseries.getMergeVec(major_values, major_axis.indexMap) minor_labels, _ = _tseries.getMergeVec(minor_values, minor_axis.indexMap) long_index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) return long_index
def map(self, arg): """ Map values of Series using input correspondence (which can be a dict, Series, or function). Parameters ---------- arg : function, dict, or Series Returns ------- y : Series same index as caller """ if isinstance(arg, (dict, Series)): if isinstance(arg, dict): arg = Series(arg) indexer, mask = _tseries.getMergeVec(self, arg.index.indexMap) notmask = -mask new_values = arg.view(np.ndarray).take(indexer) if notmask.any(): if issubclass(new_values.dtype.type, np.integer): new_values = new_values.astype(float) np.putmask(new_values, notmask, np.nan) newSer = Series(new_values, index=self.index) return newSer else: return Series([arg(x) for x in self], index=self.index)
def _read_panel_table(self, group, where=None): from pandas.core.common import _asarray_tuplesafe table = getattr(group, 'table') # create the selection sel = Selection(table, where) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = LongPanel(sel.values['values'], index=long_index, columns=fields) if lp.consistent: lp = lp.sortlevel(level=0) wp = lp.to_wide() else: if not self._quiet: print ('Duplicate entries in table, taking most recently ' 'appended') # need a better algorithm tuple_index = long_index.get_tuple_index() index_map = _tseries.map_indices_buf(tuple_index) unique_tuples = _tseries.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer, _ = _tseries.getMergeVec(unique_tuples, index_map) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = LongPanel(new_values, index=new_index, columns=lp.columns) wp = lp.to_wide() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def join_on(self, other, on, axis=1): other_axis = other.axes[axis] indexer, mask = _tseries.getMergeVec(on, other_axis.indexMap) # TODO: deal with length-0 case? or does it fall out? notmask = -mask needs_masking = len(on) > 0 and notmask.any() other_blocks = [] for block in other.blocks: newb = block.reindex_axis(indexer, notmask, needs_masking, axis=axis) other_blocks.append(newb) cons_items = self.items + other.items consolidated = _consolidate(self.blocks + other_blocks, cons_items) new_axes = list(self.axes) new_axes[0] = cons_items return BlockManager(consolidated, new_axes)
def join_on(self, other, on, axis=1, lsuffix=None, rsuffix=None): this, other = self._maybe_rename_join(other, lsuffix, rsuffix) other_axis = other.axes[axis] indexer, mask = _tseries.getMergeVec(on.astype(object), other_axis.indexMap) # TODO: deal with length-0 case? or does it fall out? notmask = -mask needs_masking = len(on) > 0 and notmask.any() other_blocks = [] for block in other.blocks: newb = block.reindex_axis(indexer, notmask, needs_masking, axis=axis) other_blocks.append(newb) cons_items = this.items + other.items consolidated = _consolidate(this.blocks + other_blocks, cons_items) new_axes = list(this.axes) new_axes[0] = cons_items return BlockManager(consolidated, new_axes)
def fromarray(cls, values): values = np.asarray(values, dtype=object) levels = Index(sorted(set(values))) labels, _ = _tseries.getMergeVec(values, levels.indexMap) return Factor(labels, levels=levels)