def reduce_axis(data, reducer, block_reducer, mapper=None, axis=None, blen=None, storage=None, create='array', **kwargs): """Apply an operation to `data` that reduces over one or more axes.""" # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) length = len(data) # normalise axis arg if isinstance(axis, int): axis = (axis, ) # deal with 'out' kwarg if supplied, can arise if a chunked array is # passed as an argument to numpy.sum(), see also # https://github.com/cggh/scikit-allel/issues/66 kwarg_out = kwargs.pop('out', None) if kwarg_out is not None: raise ValueError('keyword argument "out" is not supported') if axis is None or 0 in axis: # two-step reduction out = None for i in range(0, length, blen): j = min(i + blen, length) block = data[i:j] if mapper: block = mapper(block) res = reducer(block, axis=axis) if out is None: out = res else: out = block_reducer(out, res) if np.isscalar(out): return out elif len(out.shape) == 0: return out[()] else: return getattr(storage, create)(out, **kwargs) else: # first dimension is preserved, no need to reduce blocks out = None for i in range(0, length, blen): j = min(i + blen, length) block = data[i:j] if mapper: block = mapper(block) r = reducer(block, axis=axis) if out is None: out = getattr(storage, create)(r, expectedlen=length, **kwargs) else: out.append(r) return out
def compress(condition, data, axis=0, out=None, blen=None, storage=None, create='array', **kwargs): """Return selected slices of an array along given axis.""" # setup if out is not None: # argument is only there for numpy API compatibility raise NotImplementedError('out argument is not supported') storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) length = len(data) nnz = count_nonzero(condition) if axis == 0: _util.check_equal_length(data, condition) # block iteration out = None for i in range(0, length, blen): j = min(i + blen, length) bcond = np.asarray(condition[i:j]) # don't access any data unless we have to if np.any(bcond): block = np.asarray(data[i:j]) res = np.compress(bcond, block, axis=0) if out is None: out = getattr(storage, create)(res, expectedlen=nnz, **kwargs) else: out.append(res) return out elif axis == 1: # block iteration out = None condition = np.asanyarray(condition) for i in range(0, length, blen): j = min(i + blen, length) block = np.asarray(data[i:j]) res = np.compress(condition, block, axis=1) if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out else: raise NotImplementedError('axis not supported: %s' % axis)
def reduce_axis(data, reducer, block_reducer, mapper=None, axis=None, blen=None, storage=None, create='array', **kwargs): """Apply an operation to `data` that reduces over one or more axes.""" # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) length = len(data) # normalise axis arg if isinstance(axis, int): axis = (axis,) # deal with 'out' kwarg if supplied, can arise if a chunked array is # passed as an argument to numpy.sum(), see also # https://github.com/cggh/scikit-allel/issues/66 kwarg_out = kwargs.pop('out', None) if kwarg_out is not None: raise ValueError('keyword argument "out" is not supported') if axis is None or 0 in axis: # two-step reduction out = None for i in range(0, length, blen): j = min(i+blen, length) block = np.asanyarray(data[i:j]) if mapper: block = mapper(block) res = reducer(block, axis=axis) if out is None: out = res else: out = block_reducer(out, res) if np.isscalar(out): return out elif len(out.shape) == 0: return out[()] else: return getattr(storage, create)(out, **kwargs) else: # first dimension is preserved, no need to reduce blocks out = None for i in range(0, length, blen): j = min(i+blen, length) block = np.asanyarray(data[i:j]) if mapper: block = mapper(block) r = reducer(block, axis=axis) if out is None: out = getattr(storage, create)(r, expectedlen=length, **kwargs) else: out.append(r) return out
def compress(data, condition, axis=0, blen=None, storage=None, create='array', **kwargs): """Return selected slices of an array along given axis.""" # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) length = len(data) nnz = count_nonzero(condition) if axis == 0: _util.check_equal_length(data, condition) # block iteration out = None for i in range(0, length, blen): j = min(i+blen, length) bcond = np.asanyarray(condition[i:j]) # don't access any data unless we have to if np.any(bcond): block = np.asanyarray(data[i:j]) res = np.compress(bcond, block, axis=0) if out is None: out = getattr(storage, create)(res, expectedlen=nnz, **kwargs) else: out.append(res) return out elif axis == 1: # block iteration out = None condition = np.asanyarray(condition) for i in range(0, length, blen): j = min(i+blen, length) block = np.asanyarray(data[i:j]) res = np.compress(condition, block, axis=1) if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out else: raise NotImplementedError('axis not supported: %s' % axis)
def vstack_table(tup, blen=None, storage=None, create='table', **kwargs): """Stack tables in sequence vertically (row-wise).""" # setup storage = _util.get_storage(storage) if not isinstance(tup, (tuple, list)): raise ValueError('expected tuple or list, found %r' % tup) if len(tup) < 2: raise ValueError('expected two or more tables to stack') # build output expectedlen = sum(len(t) for t in tup) out = None tnames = None for tdata in tup: tblen = _util.get_blen_table(tdata, blen) tnames, tcolumns = _util.check_table_like(tdata, names=tnames) tlen = len(tcolumns[0]) for i in range(0, tlen, tblen): j = min(i+tblen, tlen) bcolumns = [np.asanyarray(c[i:j]) for c in tcolumns] if out is None: out = getattr(storage, create)(bcolumns, names=tnames, expectedlen=expectedlen, **kwargs) else: out.append(bcolumns) return out
def copy(data, start=0, stop=None, blen=None, storage=None, create='array', **kwargs): """Copy `data` block-wise into a new array.""" # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) if stop is None: stop = len(data) else: stop = min(stop, len(data)) length = stop - start if length < 0: raise ValueError('invalid stop/start') # copy block-wise out = None for i in range(start, stop, blen): j = min(i+blen, stop) block = np.asanyarray(data[i:j]) if out is None: out = getattr(storage, create)(block, expectedlen=length, **kwargs) else: out.append(block) return out
def compress_table(tbl, condition, blen=None, storage=None, create='table', **kwargs): """Return selected rows of a table.""" # setup storage = _util.get_storage(storage) names, columns = _util.check_table_like(tbl) blen = _util.get_blen_table(tbl, blen) _util.check_equal_length(columns[0], condition) length = len(columns[0]) nnz = count_nonzero(condition) # block iteration out = None for i in range(0, length, blen): j = min(i+blen, length) bcond = np.asanyarray(condition[i:j]) # don't access any data unless we have to if np.any(bcond): bcolumns = [np.asanyarray(c[i:j]) for c in columns] res = [np.compress(bcond, c, axis=0) for c in bcolumns] if out is None: out = getattr(storage, create)(res, names=names, expectedlen=nnz, **kwargs) else: out.append(res) return out
def concatenate_table(tup, blen=None, storage=None, create='table', **kwargs): """Stack tables in sequence vertically (row-wise).""" # setup storage = _util.get_storage(storage) if not isinstance(tup, (tuple, list)): raise ValueError('expected tuple or list, found %r' % tup) if len(tup) < 2: raise ValueError('expected two or more tables to stack') # build output expectedlen = sum(len(t) for t in tup) out = None tnames = None for tdata in tup: tblen = _util.get_blen_table(tdata, blen) tnames, tcolumns = _util.check_table_like(tdata, names=tnames) tlen = len(tcolumns[0]) for i in range(0, tlen, tblen): j = min(i + tblen, tlen) bcolumns = [c[i:j] for c in tcolumns] if out is None: out = getattr(storage, create)(bcolumns, names=tnames, expectedlen=expectedlen, **kwargs) else: out.append(bcolumns) return out
def copy_table(tbl, start=0, stop=None, blen=None, storage=None, create='table', **kwargs): """Copy `tbl` block-wise into a new table.""" # setup names, columns = _util.check_table_like(tbl) storage = _util.get_storage(storage) blen = _util.get_blen_table(tbl, blen) if stop is None: stop = len(columns[0]) else: stop = min(stop, len(columns[0])) length = stop - start if length < 0: raise ValueError('invalid stop/start') # copy block-wise out = None for i in range(start, stop, blen): j = min(i+blen, stop) res = [np.asanyarray(c[i:j]) for c in columns] if out is None: out = getattr(storage, create)(res, names=names, expectedlen=length, **kwargs) else: out.append(res) return out
def copy_table(tbl, start=0, stop=None, blen=None, storage=None, create='table', **kwargs): """Copy `tbl` block-wise into a new table.""" # setup names, columns = _util.check_table_like(tbl) storage = _util.get_storage(storage) blen = _util.get_blen_table(tbl, blen) if stop is None: stop = len(columns[0]) else: stop = min(stop, len(columns[0])) length = stop - start if length < 0: raise ValueError('invalid stop/start') # copy block-wise out = None for i in range(start, stop, blen): j = min(i + blen, stop) res = [c[i:j] for c in columns] if out is None: out = getattr(storage, create)(res, names=names, expectedlen=length, **kwargs) else: out.append(res) return out
def copy(data, start=0, stop=None, blen=None, storage=None, create='array', **kwargs): """Copy `data` block-wise into a new array.""" # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) if stop is None: stop = len(data) else: stop = min(stop, len(data)) length = stop - start if length < 0: raise ValueError('invalid stop/start') # copy block-wise out = None for i in range(start, stop, blen): j = min(i + blen, stop) block = data[i:j] if out is None: out = getattr(storage, create)(block, expectedlen=length, **kwargs) else: out.append(block) return out
def reduce_axis(data, reducer, block_reducer, mapper=None, axis=None, blen=None, storage=None, create='array', **kwargs): """Apply an operation to `data` that reduces over one or more axes.""" # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) length = len(data) # normalise axis arg if isinstance(axis, int): axis = (axis,) if axis is None or 0 in axis: # two-step reduction out = None for i in range(0, length, blen): j = min(i+blen, length) block = np.asanyarray(data[i:j]) if mapper: block = mapper(block) res = reducer(block, axis=axis) if out is None: out = res else: out = block_reducer(out, res) if np.isscalar(out): return out elif len(out.shape) == 0: return out[()] else: return getattr(storage, create)(out, **kwargs) else: # first dimension is preserved, no need to reduce blocks out = None for i in range(0, length, blen): j = min(i+blen, length) block = np.asanyarray(data[i:j]) if mapper: block = mapper(block) r = reducer(block, axis=axis) if out is None: out = getattr(storage, create)(r, expectedlen=length, **kwargs) else: out.append(r) return out
def take(data, indices, axis=0, out=None, mode='raise', blen=None, storage=None, create='array', **kwargs): """Take elements from an array along an axis.""" # setup if out is not None: # argument is only there for numpy API compatibility raise NotImplementedError('out argument is not supported') length = len(data) if axis == 0: # check that indices are strictly increasing indices = np.asanyarray(indices) if np.any(indices[1:] <= indices[:-1]): raise NotImplementedError('indices must be strictly increasing') # implement via compress() condition = np.zeros((length, ), dtype=bool) condition[indices] = True return compress(condition, data, axis=0, blen=blen, storage=storage, create=create, **kwargs) elif axis == 1: # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) # block iteration out = None for i in range(0, length, blen): j = min(i + blen, length) block = data[i:j] res = np.take(block, indices, axis=1, mode=mode) if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out else: raise NotImplementedError('axis not supported: %s' % axis)
def subset(data, sel0=None, sel1=None, blen=None, storage=None, create='array', **kwargs): """Return selected rows and columns of an array.""" # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) length = len(data) if sel0 is not None: sel0 = np.asanyarray(sel0) if sel1 is not None: sel1 = np.asanyarray(sel1) # ensure boolean array for dim 0 if sel0 is not None and sel0.dtype.kind != 'b': # assume indices, convert to boolean condition tmp = np.zeros(length, dtype=bool) tmp[sel0] = True sel0 = tmp # ensure indices for dim 1 if sel1 is not None and sel1.dtype.kind == 'b': # assume boolean condition, convert to indices sel1 = np.nonzero(sel1)[0] # shortcuts if sel0 is None and sel1 is None: return copy(data, blen=blen, storage=storage, create=create, **kwargs) elif sel1 is None: return compress(data, sel0, axis=0, blen=blen, storage=storage, create=create, **kwargs) elif sel0 is None: return take(data, sel1, axis=1, blen=blen, storage=storage, create=create, **kwargs) # build output sel0_nnz = count_nonzero(sel0) out = None for i in range(0, length, blen): j = min(i+blen, length) bsel0 = sel0[i:j] # don't access data unless we have to if np.any(bsel0): block = np.asanyarray(data[i:j]) res = _ndarray_subset(block, bsel0, sel1) if out is None: out = getattr(storage, create)(res, expectedlen=sel0_nnz, **kwargs) else: out.append(res) return out
def eval_table(tbl, expression, vm='python', blen=None, storage=None, create='array', vm_kwargs=None, **kwargs): """Evaluate `expression` against columns of a table.""" # setup storage = _util.get_storage(storage) names, columns = _util.check_table_like(tbl) length = len(columns[0]) if vm_kwargs is None: vm_kwargs = dict() # setup vm if vm == 'numexpr': import numexpr evaluate = numexpr.evaluate elif vm == 'python': # noinspection PyUnusedLocal def evaluate(expr, local_dict=None, **kw): # takes no keyword arguments return eval(expr, dict(), local_dict) else: raise ValueError('expected vm either "numexpr" or "python"') # compile expression and get required columns variables = _get_expression_variables(expression, vm) required_columns = {v: columns[names.index(v)] for v in variables} # determine block size for evaluation blen = _util.get_blen_table(required_columns, blen=blen) # build output out = None for i in range(0, length, blen): j = min(i + blen, length) blocals = {v: c[i:j] for v, c in required_columns.items()} res = evaluate(expression, local_dict=blocals, **vm_kwargs) if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out
def concatenate(tup, axis=0, blen=None, storage=None, create='array', **kwargs): """Concatenate arrays.""" # setup storage = _util.get_storage(storage) if not isinstance(tup, (tuple, list)): raise ValueError('expected tuple or list, found %r' % tup) if len(tup) < 2: raise ValueError('expected two or more arrays') if axis == 0: # build output expectedlen = sum(len(a) for a in tup) out = None for a in tup: ablen = _util.get_blen_array(a, blen) for i in range(0, len(a), ablen): j = min(i + ablen, len(a)) block = a[i:j] if out is None: out = getattr(storage, create)(block, expectedlen=expectedlen, **kwargs) else: out.append(block) else: def f(*blocks): return np.concatenate(blocks, axis=axis) out = map_blocks(tup, f, blen=blen, storage=storage, create=create, **kwargs) return out
def write_fasta(path, sequences, names, mode='w', width=80): """Write nucleotide sequences stored as numpy arrays to a FASTA file. Parameters ---------- path : string File path. sequences : sequence of arrays One or more ndarrays of dtype 'S1' containing the sequences. names : sequence of strings Names of the sequences. mode : string, optional Use 'a' to append to an existing file. width : int, optional Maximum line width. """ # check inputs if isinstance(sequences, np.ndarray): # single sequence sequences = [sequences] names = [names] if len(sequences) != len(names): raise ValueError('must provide the same number of sequences and names') for sequence in sequences: if sequence.dtype != np.dtype('S1'): raise ValueError('expected S1 dtype, found %r' % sequence.dtype) # force binary mode mode = 'ab' if 'a' in mode else 'wb' # write to file with open(path, mode=mode) as fasta: for name, sequence in zip(names, sequences): # force bytes if isinstance(name, text_type): name = name.encode('ascii') header = b'>' + name + b'\n' fasta.write(header) for i in range(0, sequence.size, width): line = sequence[i:i + width].tostring() + b'\n' fasta.write(line)
def write_fasta(path, sequences, names, mode='w', width=80): """Write nucleotide sequences stored as numpy arrays to a FASTA file. Parameters ---------- path : string File path. sequences : sequence of arrays One or more ndarrays of dtype 'S1' containing the sequences. names : sequence of strings Names of the sequences. mode : string, optional Use 'a' to append to an existing file. width : int, optional Maximum line width. """ # check inputs if isinstance(sequences, np.ndarray): # single sequence sequences = [sequences] names = [names] if len(sequences) != len(names): raise ValueError('must provide the same number of sequences and names') for sequence in sequences: if sequence.dtype != np.dtype('S1'): raise ValueError('expected S1 dtype, found %r' % sequence.dtype) # force binary mode mode = 'ab' if 'a' in mode else 'wb' # write to file with open(path, mode=mode) as fasta: for name, sequence in zip(names, sequences): # force bytes if isinstance(name, text_type): name = name.encode('ascii') header = b'>' + name + b'\n' fasta.write(header) for i in range(0, sequence.size, width): line = sequence[i:i+width].tostring() + b'\n' fasta.write(line)
def store(data, arr, start=0, stop=None, offset=0, blen=None): """Copy `data` block-wise into `arr`.""" # setup blen = _util.get_blen_array(data, blen) if stop is None: stop = len(data) else: stop = min(stop, len(data)) length = stop - start if length < 0: raise ValueError('invalid stop/start') # copy block-wise for i in range(start, stop, blen): j = min(i+blen, stop) l = j-i arr[offset:offset+l] = data[i:j] offset += l
def take(data, indices, axis=0, blen=None, storage=None, create='array', **kwargs): """Take elements from an array along an axis.""" # setup length = len(data) if axis == 0: # check that indices are strictly increasing indices = np.asanyarray(indices) if np.any(indices[1:] <= indices[:-1]): raise NotImplementedError( 'indices must be strictly increasing' ) # implement via compress() condition = np.zeros((length,), dtype=bool) condition[indices] = True return compress(data, condition, axis=0, blen=blen, storage=storage, create=create, **kwargs) elif axis == 1: # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) # block iteration out = None for i in range(0, length, blen): j = min(i+blen, length) block = np.asanyarray(data[i:j]) res = np.take(block, indices, axis=1) if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out else: raise NotImplementedError('axis not supported: %s' % axis)
def store(data, arr, start=0, stop=None, offset=0, blen=None): """Copy `data` block-wise into `arr`.""" # setup blen = _util.get_blen_array(data, blen) if stop is None: stop = len(data) else: stop = min(stop, len(data)) length = stop - start if length < 0: raise ValueError('invalid stop/start') # copy block-wise for i in range(start, stop, blen): j = min(i + blen, stop) l = j - i arr[offset:offset + l] = data[i:j] offset += l
def eval_table(tbl, expression, vm='python', blen=None, storage=None, create='array', vm_kwargs=None, **kwargs): """Evaluate `expression` against columns of a table.""" # setup storage = _util.get_storage(storage) names, columns = _util.check_table_like(tbl) length = len(columns[0]) if vm_kwargs is None: vm_kwargs = dict() # setup vm if vm == 'numexpr': import numexpr evaluate = numexpr.evaluate elif vm == 'python': # noinspection PyUnusedLocal def evaluate(expr, local_dict=None, **kw): # takes no keyword arguments return eval(expr, dict(), local_dict) else: raise ValueError('expected vm either "numexpr" or "python"') # compile expression and get required columns variables = _get_expression_variables(expression, vm) required_columns = {v: columns[names.index(v)] for v in variables} # determine block size for evaluation blen = _util.get_blen_table(required_columns, blen=blen) # build output out = None for i in range(0, length, blen): j = min(i+blen, length) blocals = {v: c[i:j] for v, c in required_columns.items()} res = evaluate(expression, local_dict=blocals, **vm_kwargs) if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out
def subset(data, sel0, sel1, blen=None, storage=None, create='array', **kwargs): """Return selected rows and columns of an array.""" # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) length = len(data) sel0 = np.asanyarray(sel0) sel1 = np.asanyarray(sel1) # ensure boolean array for dim 0 if sel0.shape[0] < length: # assume indices, convert to boolean condition tmp = np.zeros(length, dtype=bool) tmp[sel0] = True sel0 = tmp # ensure indices for dim 1 if sel1.shape[0] == data.shape[1]: # assume boolean condition, convert to indices sel1 = np.nonzero(sel1)[0] # build output sel0_nnz = count_nonzero(sel0) out = None for i in range(0, length, blen): j = min(i+blen, length) bsel0 = sel0[i:j] # don't access data unless we have to if np.any(bsel0): block = np.asanyarray(data[i:j]) res = _ndarray_subset(block, bsel0, sel1) if out is None: out = getattr(storage, create)(res, expectedlen=sel0_nnz, **kwargs) else: out.append(res) return out
def compress_table(condition, tbl, axis=None, out=None, blen=None, storage=None, create='table', **kwargs): """Return selected rows of a table.""" # setup if axis is not None and axis != 0: raise NotImplementedError('only axis 0 is supported') if out is not None: # argument is only there for numpy API compatibility raise NotImplementedError('out argument is not supported') storage = _util.get_storage(storage) names, columns = _util.check_table_like(tbl) blen = _util.get_blen_table(tbl, blen) _util.check_equal_length(columns[0], condition) length = len(columns[0]) nnz = count_nonzero(condition) # block iteration out = None for i in range(0, length, blen): j = min(i + blen, length) bcond = condition[i:j] # don't access any data unless we have to if np.any(bcond): bcolumns = [c[i:j] for c in columns] res = [np.compress(bcond, c, axis=0) for c in bcolumns] if out is None: out = getattr(storage, create)(res, names=names, expectedlen=nnz, **kwargs) else: out.append(res) return out
def vstack(tup, blen=None, storage=None, create='array', **kwargs): """Stack arrays in sequence vertically (row wise).""" # setup storage = _util.get_storage(storage) if not isinstance(tup, (tuple, list)): raise ValueError('expected tuple or list, found %r' % tup) if len(tup) < 2: raise ValueError('expected two or more arrays to stack') # build output expectedlen = sum(len(a) for a in tup) out = None for a in tup: ablen = _util.get_blen_array(a, blen) for i in range(0, len(a), ablen): j = min(i+ablen, len(a)) block = np.asanyarray(a[i:j]) if out is None: out = getattr(storage, create)(block, expectedlen=expectedlen, **kwargs) else: out.append(block) return out
def apply(data, f, blen=None, storage=None, create='array', **kwargs): """Apply function `f` block-wise over `data`.""" # setup storage = _util.get_storage(storage) if isinstance(data, tuple): blen = max(_util.get_blen_array(d, blen) for d in data) else: blen = _util.get_blen_array(data, blen) if isinstance(data, tuple): _util.check_equal_length(*data) length = len(data[0]) else: length = len(data) # block-wise iteration out = None for i in range(0, length, blen): j = min(i+blen, length) # obtain blocks if isinstance(data, tuple): blocks = [np.asanyarray(d[i:j]) for d in data] else: blocks = [np.asanyarray(data[i:j])] # map res = f(*blocks) # store if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out
def map_blocks(data, f, blen=None, storage=None, create='array', **kwargs): """Apply function `f` block-wise over `data`.""" # setup storage = _util.get_storage(storage) if isinstance(data, tuple): blen = max(_util.get_blen_array(d, blen) for d in data) else: blen = _util.get_blen_array(data, blen) if isinstance(data, tuple): _util.check_equal_length(*data) length = len(data[0]) else: length = len(data) # block-wise iteration out = None for i in range(0, length, blen): j = min(i + blen, length) # obtain blocks if isinstance(data, tuple): blocks = [d[i:j] for d in data] else: blocks = [data[i:j]] # map res = f(*blocks) # store if out is None: out = getattr(storage, create)(res, expectedlen=length, **kwargs) else: out.append(res) return out
def subset(data, sel0=None, sel1=None, blen=None, storage=None, create='array', **kwargs): """Return selected rows and columns of an array.""" # TODO refactor sel0 and sel1 normalization with ndarray.subset # setup storage = _util.get_storage(storage) blen = _util.get_blen_array(data, blen) length = len(data) if sel0 is not None: sel0 = np.asanyarray(sel0) if sel1 is not None: sel1 = np.asanyarray(sel1) # ensure boolean array for dim 0 if sel0 is not None and sel0.dtype.kind != 'b': # assume indices, convert to boolean condition tmp = np.zeros(length, dtype=bool) tmp[sel0] = True sel0 = tmp # ensure indices for dim 1 if sel1 is not None and sel1.dtype.kind == 'b': # assume boolean condition, convert to indices sel1, = np.nonzero(sel1) # shortcuts if sel0 is None and sel1 is None: return copy(data, blen=blen, storage=storage, create=create, **kwargs) elif sel1 is None: return compress(sel0, data, axis=0, blen=blen, storage=storage, create=create, **kwargs) elif sel0 is None: return take(data, sel1, axis=1, blen=blen, storage=storage, create=create, **kwargs) # build output sel0_nnz = count_nonzero(sel0) out = None for i in range(0, length, blen): j = min(i + blen, length) bsel0 = sel0[i:j] # don't access data unless we have to if np.any(bsel0): block = data[i:j] res = _numpy_subset(block, bsel0, sel1) if out is None: out = getattr(storage, create)(res, expectedlen=sel0_nnz, **kwargs) else: out.append(res) return out