def copy_table(tbl, start=0, stop=None, blen=None, storage=None, create='table', **kwargs): """Copy `tbl` block-wise into a new table.""" # setup names, columns = _util.check_table_like(tbl) storage = _util.get_storage(storage) blen = _util.get_blen_table(tbl, blen) if stop is None: stop = len(columns[0]) else: stop = min(stop, len(columns[0])) length = stop - start if length < 0: raise ValueError('invalid stop/start') # copy block-wise out = None for i in range(start, stop, blen): j = min(i+blen, stop) res = [np.asanyarray(c[i:j]) for c in columns] if out is None: out = getattr(storage, create)(res, names=names, expectedlen=length, **kwargs) else: out.append(res) return out
def compress_table(tbl, condition, blen=None, storage=None, create='table', **kwargs): """Return selected rows of a table.""" # setup storage = _util.get_storage(storage) names, columns = _util.check_table_like(tbl) blen = _util.get_blen_table(tbl, blen) _util.check_equal_length(columns[0], condition) length = len(columns[0]) nnz = count_nonzero(condition) # block iteration out = None for i in range(0, length, blen): j = min(i+blen, length) bcond = np.asanyarray(condition[i:j]) # don't access any data unless we have to if np.any(bcond): bcolumns = [np.asanyarray(c[i:j]) for c in columns] res = [np.compress(bcond, c, axis=0) for c in bcolumns] if out is None: out = getattr(storage, create)(res, names=names, expectedlen=nnz, **kwargs) else: out.append(res) return out
def concatenate_table(tup, blen=None, storage=None, create='table', **kwargs): """Stack tables in sequence vertically (row-wise).""" # setup storage = _util.get_storage(storage) if not isinstance(tup, (tuple, list)): raise ValueError('expected tuple or list, found %r' % tup) if len(tup) < 2: raise ValueError('expected two or more tables to stack') # build output expectedlen = sum(len(t) for t in tup) out = None tnames = None for tdata in tup: tblen = _util.get_blen_table(tdata, blen) tnames, tcolumns = _util.check_table_like(tdata, names=tnames) tlen = len(tcolumns[0]) for i in range(0, tlen, tblen): j = min(i + tblen, tlen) bcolumns = [c[i:j] for c in tcolumns] if out is None: out = getattr(storage, create)(bcolumns, names=tnames, expectedlen=expectedlen, **kwargs) else: out.append(bcolumns) return out
def table(self, data, names=None, expectedlen=None, **kwargs): # setup names, columns = _util.check_table_like(data, names=names) # obtain group h5g = kwargs.pop('group', None) if h5g is None: # open file, use root group h5g, kwargs = self.open_file(**kwargs) # create columns for n, c in zip(names, columns): self.create_dataset(h5g, data=c, name=n, expectedlen=expectedlen, **kwargs) # patch in append method h5g.append = MethodType(_table_append, h5g) # patch in names attribute h5g.names = names return h5g
def copy_table(tbl, start=0, stop=None, blen=None, storage=None, create='table', **kwargs): """Copy `tbl` block-wise into a new table.""" # setup names, columns = _util.check_table_like(tbl) storage = _util.get_storage(storage) blen = _util.get_blen_table(tbl, blen) if stop is None: stop = len(columns[0]) else: stop = min(stop, len(columns[0])) length = stop - start if length < 0: raise ValueError('invalid stop/start') # copy block-wise out = None for i in range(start, stop, blen): j = min(i + blen, stop) res = [c[i:j] for c in columns] if out is None: out = getattr(storage, create)(res, names=names, expectedlen=length, **kwargs) else: out.append(res) return out
def __init__(self, data, names=None): names, columns = _util.check_table_like(data, names=names) # skip super-class constructor because we are more flexible about type of values here self._values = data self._names = names self._columns = columns self.rowcls = namedtuple('row', names)
def vstack_table(tup, blen=None, storage=None, create='table', **kwargs): """Stack tables in sequence vertically (row-wise).""" # setup storage = _util.get_storage(storage) if not isinstance(tup, (tuple, list)): raise ValueError('expected tuple or list, found %r' % tup) if len(tup) < 2: raise ValueError('expected two or more tables to stack') # build output expectedlen = sum(len(t) for t in tup) out = None tnames = None for tdata in tup: tblen = _util.get_blen_table(tdata, blen) tnames, tcolumns = _util.check_table_like(tdata, names=tnames) tlen = len(tcolumns[0]) for i in range(0, tlen, tblen): j = min(i+tblen, tlen) bcolumns = [np.asanyarray(c[i:j]) for c in tcolumns] if out is None: out = getattr(storage, create)(bcolumns, names=tnames, expectedlen=expectedlen, **kwargs) else: out.append(bcolumns) return out
def table(self, data, names=None, expectedlen=None, **kwargs): # setup names, columns = _util.check_table_like(data, names=names) kwargs = self._set_defaults(kwargs) chunks = kwargs.pop('chunks', None) g = zarr.group(**kwargs) # create columns for n, c in zip(names, columns): if chunks is None: chunks = default_chunks(c, expectedlen) if c.dtype == object: # peek at first value peek = c[0] if isinstance(peek, bytes): object_codec = numcodecs.VLenBytes() elif isinstance(peek, str): object_codec = numcodecs.VLenUTF8() else: object_codec = numcodecs.MsgPack() else: object_codec = None g.array(name=n, data=c, chunks=chunks, object_codec=object_codec) # create table ztbl = ZarrTable(g, names=names) return ztbl
def table(self, data, names=None, expectedlen=None, **kwargs): names, columns = _util.check_table_like(data, names=names) kwargs = self._set_defaults(kwargs) ctbl = bcolz.ctable(columns, names=names, expectedlen=expectedlen, **kwargs) # patch append method ctbl.append_original = ctbl.append ctbl.append = MethodType(_table_append, ctbl) return ctbl
def eval_table(tbl, expression, vm='python', blen=None, storage=None, create='array', vm_kwargs=None, **kwargs): """Evaluate `expression` against columns of a table.""" # setup storage = _util.get_storage(storage) names, columns = _util.check_table_like(tbl) length = len(columns[0]) if vm_kwargs is None: vm_kwargs = dict() # setup vm if vm == 'numexpr': import numexpr evaluate = numexpr.evaluate elif vm == 'python': # noinspection PyUnusedLocal def evaluate(expr, local_dict=None, **kw): # takes no keyword arguments return eval(expr, dict(), local_dict) else: raise ValueError('expected vm either "numexpr" or "python"') # compile expression and get required columns variables = _get_expression_variables(expression, vm) required_columns = {v: columns[names.index(v)] for v in variables} # determine block size for evaluation blen = _util.get_blen_table(required_columns, blen=blen) # build output out = None for i in range(0, length, blen): j = min(i + blen, length) blocals = {v: c[i:j] for v, c in required_columns.items()} res = evaluate(expression, local_dict=blocals, **vm_kwargs) if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out
def table(self, data, names=None, expectedlen=None, **kwargs): # setup names, columns = _util.check_table_like(data, names=names) kwargs = self._set_defaults(kwargs) g = zarr.group(**kwargs) # create columns chunks = kwargs.get('chunks', None) for n, c in zip(names, columns): if chunks is None: chunks = default_chunks(c, expectedlen) g.array(name=n, data=c, chunks=chunks) # create table ztbl = ZarrTable(g, names=names) return ztbl
def eval_table(tbl, expression, vm='python', blen=None, storage=None, create='array', vm_kwargs=None, **kwargs): """Evaluate `expression` against columns of a table.""" # setup storage = _util.get_storage(storage) names, columns = _util.check_table_like(tbl) length = len(columns[0]) if vm_kwargs is None: vm_kwargs = dict() # setup vm if vm == 'numexpr': import numexpr evaluate = numexpr.evaluate elif vm == 'python': # noinspection PyUnusedLocal def evaluate(expr, local_dict=None, **kw): # takes no keyword arguments return eval(expr, dict(), local_dict) else: raise ValueError('expected vm either "numexpr" or "python"') # compile expression and get required columns variables = _get_expression_variables(expression, vm) required_columns = {v: columns[names.index(v)] for v in variables} # determine block size for evaluation blen = _util.get_blen_table(required_columns, blen=blen) # build output out = None for i in range(0, length, blen): j = min(i+blen, length) blocals = {v: c[i:j] for v, c in required_columns.items()} res = evaluate(expression, local_dict=blocals, **vm_kwargs) if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out
def table(self, data, names=None, expectedlen=None, **kwargs): # ignore expectedlen # setup names, columns = _util.check_table_like(data, names=names) kwargs = self._set_defaults(kwargs) path = kwargs.get('path', None) zcols = list() # create columns for n, c in zip(names, columns): col_kwargs = kwargs.copy() if path: col_kwargs['path'] = os.path.join(path, n) zcol = self._create_array(c, **col_kwargs) zcols.append(zcol) # create table ztbl = ZarrTable(names, zcols) return ztbl
def take_table(tbl, indices, blen=None, storage=None, create='table', **kwargs): """Return selected rows of a table.""" # setup names, columns = _util.check_table_like(tbl) length = len(columns[0]) # check that indices are strictly increasing indices = np.asanyarray(indices) if np.any(indices[1:] <= indices[:-1]): raise NotImplementedError( 'indices must be strictly increasing' ) # implement via compress() condition = np.zeros((length,), dtype=bool) condition[indices] = True return compress_table(tbl, condition, blen=blen, storage=storage, create=create, **kwargs)
def compress_table(condition, tbl, axis=None, out=None, blen=None, storage=None, create='table', **kwargs): """Return selected rows of a table.""" # setup if axis is not None and axis != 0: raise NotImplementedError('only axis 0 is supported') if out is not None: # argument is only there for numpy API compatibility raise NotImplementedError('out argument is not supported') storage = _util.get_storage(storage) names, columns = _util.check_table_like(tbl) blen = _util.get_blen_table(tbl, blen) _util.check_equal_length(columns[0], condition) length = len(columns[0]) nnz = count_nonzero(condition) # block iteration out = None for i in range(0, length, blen): j = min(i + blen, length) bcond = condition[i:j] # don't access any data unless we have to if np.any(bcond): bcolumns = [c[i:j] for c in columns] res = [np.compress(bcond, c, axis=0) for c in bcolumns] if out is None: out = getattr(storage, create)(res, names=names, expectedlen=nnz, **kwargs) else: out.append(res) return out
def table(self, data, names=None, expectedlen=None, **kwargs): # ignore expectedlen for now # setup names, columns = _util.check_table_like(data, names=names) # obtain group h5g = kwargs.pop("group", None) if h5g is None: # open file, use root group h5g, kwargs = self.open_file(**kwargs) # create columns for n, c in zip(names, columns): self.create_dataset(h5g, data=c, name=n, **kwargs) # patch in append method h5g.append = MethodType(_table_append, h5g) # patch in names attribute h5g.names = names return h5g
def take_table(tbl, indices, axis=None, out=None, mode='raise', blen=None, storage=None, create='table', **kwargs): """Return selected rows of a table.""" # setup if axis is not None and axis != 0: raise NotImplementedError('only axis 0 is supported') if out is not None: # argument is only there for numpy API compatibility raise NotImplementedError('out argument is not supported') if mode is not None and mode != 'raise': raise NotImplementedError('only mode=raise is supported') names, columns = _util.check_table_like(tbl) length = len(columns[0]) # check that indices are strictly increasing indices = np.asanyarray(indices) if np.any(indices[1:] <= indices[:-1]): raise NotImplementedError('indices must be strictly increasing') # implement via compress() condition = np.zeros((length, ), dtype=bool) condition[indices] = True return compress_table(condition, tbl, blen=blen, storage=storage, create=create, **kwargs)
def __init__(self, data, names=None): names, columns = _util.check_table_like(data, names=names) self.data = data self.names = names self.columns = columns self.rowcls = namedtuple('row', names)
def append(self, data): _, columns = _util.check_table_like(data, names=self.names) for co, cn in zip(self.columns, columns): co.append(cn)
def _table_append(h5g, data): names, columns = _util.check_table_like(data, names=h5g.names) for n, c in zip(names, columns): h5d = h5g[n] _dataset_append(h5d, c)
def append(self, data): names, columns = _util.check_table_like(data, names=self.names) for n, c in zip(names, columns): self.grp[n].append(c)