def _merge_blocks(self, merge_chunks): """ merge_chunks -> [(_JoinUnit, Block)] """ funit, fblock = merge_chunks[0] fidx = funit.indexer out_shape = list(fblock.values.shape) n = len(fidx) if fidx is not None else out_shape[self.axis] out_shape[0] = sum(len(blk) for unit, blk in merge_chunks) out_shape[self.axis] = n # Should use Fortran order?? block_dtype = _get_block_dtype([x[1] for x in merge_chunks]) out = np.empty(out_shape, dtype=block_dtype) sofar = 0 for unit, blk in merge_chunks: out_chunk = out[sofar: sofar + len(blk)] com.take_fast(blk.values, unit.indexer, None, False, axis=self.axis, out=out_chunk) sofar += len(blk) # does not sort new_block_items = _concat_indexes([b.items for _, b in merge_chunks]) return make_block(out, new_block_items, self.result_items)
def _merge_blocks(self, merge_chunks): """ merge_chunks -> [(_JoinUnit, Block)] """ funit, fblock = merge_chunks[0] fidx = funit.indexer out_shape = list(fblock.values.shape) n = len(fidx) if fidx is not None else out_shape[self.axis] out_shape[0] = sum(len(blk) for unit, blk in merge_chunks) out_shape[self.axis] = n # Should use Fortran order?? out = np.empty(out_shape, dtype=fblock.values.dtype) sofar = 0 for unit, blk in merge_chunks: out_chunk = out[sofar : sofar + len(blk)] if unit.indexer is None: # is this really faster than assigning to arr.flat? com.take_fast(blk.values, np.arange(n, dtype=np.int64), None, False, axis=self.axis, out=out_chunk) else: # write out the values to the result array com.take_fast(blk.values, unit.indexer, None, False, axis=self.axis, out=out_chunk) sofar += len(blk) # does not sort new_block_items = _concat_indexes([b.items for _, b in merge_chunks]) return make_block(out, new_block_items, self.result_items)
def _reindex_indexer_items(self, new_items, indexer, fill_value): # TODO: less efficient than I'd like item_order = com.take_1d(self.items.values, indexer) # keep track of what items aren't found anywhere mask = np.zeros(len(item_order), dtype=bool) new_blocks = [] for blk in self.blocks: blk_indexer = blk.items.get_indexer(item_order) selector = blk_indexer != -1 # update with observed items mask |= selector if not selector.any(): continue new_block_items = new_items.take(selector.nonzero()[0]) new_values = com.take_fast(blk.values, blk_indexer[selector], None, False, axis=0) new_blocks.append(make_block(new_values, new_block_items, new_items)) if not mask.all(): na_items = new_items[-mask] na_block = self._make_na_block(na_items, new_items, fill_value=fill_value) new_blocks.append(na_block) new_blocks = _consolidate(new_blocks, new_items) return BlockManager(new_blocks, [new_items] + self.axes[1:])
def take(self, indexer, axis=1, fill_value=np.nan): if axis < 1: raise AssertionError('axis must be at least 1, got %d' % axis) new_values = com.take_fast(self.values, indexer, None, None, axis=axis, fill_value=fill_value) return make_block(new_values, self.items, self.ref_items)
def reindex_axis(self, indexer, mask, needs_masking, axis=0, fill_value=np.nan): """ Reindex using pre-computed indexer information """ if self.values.size > 0: new_values = com.take_fast(self.values, indexer, mask, needs_masking, axis=axis, fill_value=fill_value) else: shape = list(self.shape) shape[axis] = len(indexer) new_values = np.empty(shape) new_values.fill(fill_value) return make_block(new_values, self.items, self.ref_items)
def reindex_indexer(self, new_axis, indexer, axis=1): """ pandas-indexer with -1's only """ if axis == 0: raise NotImplementedError new_axes = list(self.axes) new_axes[axis] = new_axis new_blocks = [] for blk in self.blocks: new_values = common.take_fast(blk.values, indexer, None, False, axis=axis) newb = make_block(new_values, blk.items, self.items) new_blocks.append(newb) return BlockManager(new_blocks, new_axes)
def take(self, indexer, axis=1, pandas_indexer=False): if axis == 0: raise NotImplementedError if pandas_indexer: take_f = lambda arr: common.take_fast(arr, indexer, None, False, axis=axis) else: take_f = lambda arr: arr.take(indexer, axis=axis) new_axes = list(self.axes) new_axes[axis] = self.axes[axis].take(indexer) new_blocks = [] for blk in self.blocks: newb = make_block(take_f(blk.values), blk.items, self.items) new_blocks.append(newb) return BlockManager(new_blocks, new_axes)
def take(self, indexer, axis=1): if axis == 0: raise NotImplementedError indexer = np.asarray(indexer, dtype="i4") n = len(self.axes[axis]) if ((indexer == -1) | (indexer >= n)).any(): raise Exception("Indices must be nonzero and less than " "the axis length") new_axes = list(self.axes) new_axes[axis] = self.axes[axis].take(indexer) new_blocks = [] for blk in self.blocks: new_values = com.take_fast(blk.values, indexer, None, False, axis=axis) newb = make_block(new_values, blk.items, self.items) new_blocks.append(newb) return BlockManager(new_blocks, new_axes)
def _merge_blocks(self, lblk, rblk): lidx = self.lindexer ridx = self.rindexer n = lblk.values.shape[self.axis] if lidx is None else len(lidx) lk = len(lblk.items) rk = len(rblk.items) out_shape = list(lblk.shape) out_shape[0] = lk + rk out_shape[self.axis] = n out = np.empty(out_shape, dtype=lblk.values.dtype) # is this really faster than assigning to arr.flat? if lidx is None: # out[:lk] = lblk.values common.take_fast(lblk.values, np.arange(n, dtype='i4'), None, False, axis=self.axis, out=out[:lk]) else: # write out the values to the result array common.take_fast(lblk.values, lidx, None, False, axis=self.axis, out=out[:lk]) if ridx is None: # out[lk:] = lblk.values common.take_fast(rblk.values, np.arange(n, dtype='i4'), None, False, axis=self.axis, out=out[lk:]) else: common.take_fast(rblk.values, ridx, None, False, axis=self.axis, out=out[lk:]) # does not sort new_items = lblk.items.append(rblk.items) return make_block(out, new_items, self.result_items)
def take(self, indexer, axis=1): if axis == 0: raise NotImplementedError indexer = np.asarray(indexer, dtype='i4') n = len(self.axes[axis]) if ((indexer == -1) | (indexer >= n)).any(): raise Exception('Indices must be nonzero and less than ' 'the axis length') new_axes = list(self.axes) new_axes[axis] = self.axes[axis].take(indexer) new_blocks = [] for blk in self.blocks: new_values = com.take_fast(blk.values, indexer, None, False, axis=axis) newb = make_block(new_values, blk.items, self.items) new_blocks.append(newb) return BlockManager(new_blocks, new_axes)
def _reindex_indexer_items(self, new_items, indexer, fill_value): # TODO: less efficient than I'd like item_order = com.take_1d(self.items.values, indexer) # keep track of what items aren't found anywhere mask = np.zeros(len(item_order), dtype=bool) new_blocks = [] for blk in self.blocks: blk_indexer = blk.items.get_indexer(item_order) selector = blk_indexer != -1 # update with observed items mask |= selector if not selector.any(): continue new_block_items = new_items.take(selector.nonzero()[0]) new_values = com.take_fast(blk.values, blk_indexer[selector], None, False, axis=0) new_blocks.append( make_block(new_values, new_block_items, new_items)) if not mask.all(): na_items = new_items[-mask] na_block = self._make_na_block(na_items, new_items, fill_value=fill_value) new_blocks.append(na_block) new_blocks = _consolidate(new_blocks, new_items) return BlockManager(new_blocks, [new_items] + self.axes[1:])
def take(self, indexer, axis=1, fill_value=np.nan): assert(axis >= 1) new_values = com.take_fast(self.values, indexer, None, None, axis=axis, fill_value=fill_value) return make_block(new_values, self.items, self.ref_items)