Пример #1
0
def reduce_axis(data,
                reducer,
                block_reducer,
                mapper=None,
                axis=None,
                blen=None,
                storage=None,
                create='array',
                **kwargs):
    """Apply an operation to `data` that reduces over one or more axes."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    # normalise axis arg
    if isinstance(axis, int):
        axis = (axis, )

    # deal with 'out' kwarg if supplied, can arise if a chunked array is
    # passed as an argument to numpy.sum(), see also
    # https://github.com/cggh/scikit-allel/issues/66
    kwarg_out = kwargs.pop('out', None)
    if kwarg_out is not None:
        raise ValueError('keyword argument "out" is not supported')

    if axis is None or 0 in axis:
        # two-step reduction
        out = None
        for i in range(0, length, blen):
            j = min(i + blen, length)
            block = data[i:j]
            if mapper:
                block = mapper(block)
            res = reducer(block, axis=axis)
            if out is None:
                out = res
            else:
                out = block_reducer(out, res)
        if np.isscalar(out):
            return out
        elif len(out.shape) == 0:
            return out[()]
        else:
            return getattr(storage, create)(out, **kwargs)

    else:
        # first dimension is preserved, no need to reduce blocks
        out = None
        for i in range(0, length, blen):
            j = min(i + blen, length)
            block = data[i:j]
            if mapper:
                block = mapper(block)
            r = reducer(block, axis=axis)
            if out is None:
                out = getattr(storage, create)(r, expectedlen=length, **kwargs)
            else:
                out.append(r)
        return out
Пример #2
0
def compress(condition,
             data,
             axis=0,
             out=None,
             blen=None,
             storage=None,
             create='array',
             **kwargs):
    """Return selected slices of an array along given axis."""

    # setup
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    nnz = count_nonzero(condition)

    if axis == 0:
        _util.check_equal_length(data, condition)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i + blen, length)
            bcond = np.asarray(condition[i:j])
            # don't access any data unless we have to
            if np.any(bcond):
                block = np.asarray(data[i:j])
                res = np.compress(bcond, block, axis=0)
                if out is None:
                    out = getattr(storage, create)(res,
                                                   expectedlen=nnz,
                                                   **kwargs)
                else:
                    out.append(res)
        return out

    elif axis == 1:

        # block iteration
        out = None
        condition = np.asanyarray(condition)
        for i in range(0, length, blen):
            j = min(i + blen, length)
            block = np.asarray(data[i:j])
            res = np.compress(condition, block, axis=1)
            if out is None:
                out = getattr(storage, create)(res,
                                               expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)

        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)
Пример #3
0
def reduce_axis(data, reducer, block_reducer, mapper=None, axis=None,
                blen=None, storage=None, create='array', **kwargs):
    """Apply an operation to `data` that reduces over one or more axes."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    # normalise axis arg
    if isinstance(axis, int):
        axis = (axis,)

    # deal with 'out' kwarg if supplied, can arise if a chunked array is
    # passed as an argument to numpy.sum(), see also
    # https://github.com/cggh/scikit-allel/issues/66
    kwarg_out = kwargs.pop('out', None)
    if kwarg_out is not None:
        raise ValueError('keyword argument "out" is not supported')

    if axis is None or 0 in axis:
        # two-step reduction
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            if mapper:
                block = mapper(block)
            res = reducer(block, axis=axis)
            if out is None:
                out = res
            else:
                out = block_reducer(out, res)
        if np.isscalar(out):
            return out
        elif len(out.shape) == 0:
            return out[()]
        else:
            return getattr(storage, create)(out, **kwargs)

    else:
        # first dimension is preserved, no need to reduce blocks
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            if mapper:
                block = mapper(block)
            r = reducer(block, axis=axis)
            if out is None:
                out = getattr(storage, create)(r, expectedlen=length, **kwargs)
            else:
                out.append(r)
        return out
Пример #4
0
def compress(data, condition, axis=0, blen=None, storage=None,
             create='array', **kwargs):
    """Return selected slices of an array along given axis."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    nnz = count_nonzero(condition)

    if axis == 0:
        _util.check_equal_length(data, condition)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            bcond = np.asanyarray(condition[i:j])
            # don't access any data unless we have to
            if np.any(bcond):
                block = np.asanyarray(data[i:j])
                res = np.compress(bcond, block, axis=0)
                if out is None:
                    out = getattr(storage, create)(res, expectedlen=nnz,
                                                   **kwargs)
                else:
                    out.append(res)
        return out

    elif axis == 1:

        # block iteration
        out = None
        condition = np.asanyarray(condition)
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            res = np.compress(condition, block, axis=1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)

        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)
Пример #5
0
def vstack_table(tup, blen=None, storage=None, create='table', **kwargs):
    """Stack tables in sequence vertically (row-wise)."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more tables to stack')

    # build output
    expectedlen = sum(len(t) for t in tup)
    out = None
    tnames = None
    for tdata in tup:
        tblen = _util.get_blen_table(tdata, blen)
        tnames, tcolumns = _util.check_table_like(tdata, names=tnames)
        tlen = len(tcolumns[0])
        for i in range(0, tlen, tblen):
            j = min(i+tblen, tlen)
            bcolumns = [np.asanyarray(c[i:j]) for c in tcolumns]
            if out is None:
                out = getattr(storage, create)(bcolumns, names=tnames,
                                               expectedlen=expectedlen,
                                               **kwargs)
            else:
                out.append(bcolumns)
    return out
Пример #6
0
def copy(data, start=0, stop=None, blen=None, storage=None, create='array',
         **kwargs):
    """Copy `data` block-wise into a new array."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i+blen, stop)
        block = np.asanyarray(data[i:j])
        if out is None:
            out = getattr(storage, create)(block, expectedlen=length, **kwargs)
        else:
            out.append(block)

    return out
Пример #7
0
def compress_table(tbl, condition, blen=None, storage=None, create='table',
                   **kwargs):
    """Return selected rows of a table."""

    # setup
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    blen = _util.get_blen_table(tbl, blen)
    _util.check_equal_length(columns[0], condition)
    length = len(columns[0])
    nnz = count_nonzero(condition)

    # block iteration
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bcond = np.asanyarray(condition[i:j])
        # don't access any data unless we have to
        if np.any(bcond):
            bcolumns = [np.asanyarray(c[i:j]) for c in columns]
            res = [np.compress(bcond, c, axis=0) for c in bcolumns]
            if out is None:
                out = getattr(storage, create)(res, names=names,
                                               expectedlen=nnz, **kwargs)
            else:
                out.append(res)
    return out
Пример #8
0
def concatenate_table(tup, blen=None, storage=None, create='table', **kwargs):
    """Stack tables in sequence vertically (row-wise)."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more tables to stack')

    # build output
    expectedlen = sum(len(t) for t in tup)
    out = None
    tnames = None
    for tdata in tup:
        tblen = _util.get_blen_table(tdata, blen)
        tnames, tcolumns = _util.check_table_like(tdata, names=tnames)
        tlen = len(tcolumns[0])
        for i in range(0, tlen, tblen):
            j = min(i + tblen, tlen)
            bcolumns = [c[i:j] for c in tcolumns]
            if out is None:
                out = getattr(storage, create)(bcolumns,
                                               names=tnames,
                                               expectedlen=expectedlen,
                                               **kwargs)
            else:
                out.append(bcolumns)
    return out
Пример #9
0
def copy_table(tbl, start=0, stop=None, blen=None, storage=None,
               create='table', **kwargs):
    """Copy `tbl` block-wise into a new table."""

    # setup
    names, columns = _util.check_table_like(tbl)
    storage = _util.get_storage(storage)
    blen = _util.get_blen_table(tbl, blen)
    if stop is None:
        stop = len(columns[0])
    else:
        stop = min(stop, len(columns[0]))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i+blen, stop)
        res = [np.asanyarray(c[i:j]) for c in columns]
        if out is None:
            out = getattr(storage, create)(res, names=names,
                                           expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
Пример #10
0
def copy_table(tbl,
               start=0,
               stop=None,
               blen=None,
               storage=None,
               create='table',
               **kwargs):
    """Copy `tbl` block-wise into a new table."""

    # setup
    names, columns = _util.check_table_like(tbl)
    storage = _util.get_storage(storage)
    blen = _util.get_blen_table(tbl, blen)
    if stop is None:
        stop = len(columns[0])
    else:
        stop = min(stop, len(columns[0]))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i + blen, stop)
        res = [c[i:j] for c in columns]
        if out is None:
            out = getattr(storage, create)(res,
                                           names=names,
                                           expectedlen=length,
                                           **kwargs)
        else:
            out.append(res)

    return out
Пример #11
0
def compress_table(tbl, condition, blen=None, storage=None, create='table',
                   **kwargs):
    """Return selected rows of a table."""

    # setup
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    blen = _util.get_blen_table(tbl, blen)
    _util.check_equal_length(columns[0], condition)
    length = len(columns[0])
    nnz = count_nonzero(condition)

    # block iteration
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bcond = np.asanyarray(condition[i:j])
        # don't access any data unless we have to
        if np.any(bcond):
            bcolumns = [np.asanyarray(c[i:j]) for c in columns]
            res = [np.compress(bcond, c, axis=0) for c in bcolumns]
            if out is None:
                out = getattr(storage, create)(res, names=names,
                                               expectedlen=nnz, **kwargs)
            else:
                out.append(res)
    return out
Пример #12
0
def copy(data,
         start=0,
         stop=None,
         blen=None,
         storage=None,
         create='array',
         **kwargs):
    """Copy `data` block-wise into a new array."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    out = None
    for i in range(start, stop, blen):
        j = min(i + blen, stop)
        block = data[i:j]
        if out is None:
            out = getattr(storage, create)(block, expectedlen=length, **kwargs)
        else:
            out.append(block)

    return out
Пример #13
0
def reduce_axis(data, reducer, block_reducer, mapper=None, axis=None,
                blen=None, storage=None, create='array', **kwargs):
    """Apply an operation to `data` that reduces over one or more axes."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    # normalise axis arg
    if isinstance(axis, int):
        axis = (axis,)

    if axis is None or 0 in axis:
        # two-step reduction
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            if mapper:
                block = mapper(block)
            res = reducer(block, axis=axis)
            if out is None:
                out = res
            else:
                out = block_reducer(out, res)
        if np.isscalar(out):
            return out
        elif len(out.shape) == 0:
            return out[()]
        else:
            return getattr(storage, create)(out, **kwargs)

    else:
        # first dimension is preserved, no need to reduce blocks
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            if mapper:
                block = mapper(block)
            r = reducer(block, axis=axis)
            if out is None:
                out = getattr(storage, create)(r, expectedlen=length, **kwargs)
            else:
                out.append(r)
        return out
Пример #14
0
def take(data,
         indices,
         axis=0,
         out=None,
         mode='raise',
         blen=None,
         storage=None,
         create='array',
         **kwargs):
    """Take elements from an array along an axis."""

    # setup
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    length = len(data)

    if axis == 0:

        # check that indices are strictly increasing
        indices = np.asanyarray(indices)
        if np.any(indices[1:] <= indices[:-1]):
            raise NotImplementedError('indices must be strictly increasing')

        # implement via compress()
        condition = np.zeros((length, ), dtype=bool)
        condition[indices] = True
        return compress(condition,
                        data,
                        axis=0,
                        blen=blen,
                        storage=storage,
                        create=create,
                        **kwargs)

    elif axis == 1:

        # setup
        storage = _util.get_storage(storage)
        blen = _util.get_blen_array(data, blen)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i + blen, length)
            block = data[i:j]
            res = np.take(block, indices, axis=1, mode=mode)
            if out is None:
                out = getattr(storage, create)(res,
                                               expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)
        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)
Пример #15
0
def subset(data, sel0=None, sel1=None, blen=None, storage=None, create='array',
           **kwargs):
    """Return selected rows and columns of an array."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    if sel0 is not None:
        sel0 = np.asanyarray(sel0)
    if sel1 is not None:
        sel1 = np.asanyarray(sel1)

    # ensure boolean array for dim 0
    if sel0 is not None and sel0.dtype.kind != 'b':
        # assume indices, convert to boolean condition
        tmp = np.zeros(length, dtype=bool)
        tmp[sel0] = True
        sel0 = tmp

    # ensure indices for dim 1
    if sel1 is not None and sel1.dtype.kind == 'b':
        # assume boolean condition, convert to indices
        sel1 = np.nonzero(sel1)[0]

    # shortcuts
    if sel0 is None and sel1 is None:
        return copy(data, blen=blen, storage=storage, create=create, **kwargs)
    elif sel1 is None:
        return compress(data, sel0, axis=0, blen=blen, storage=storage,
                        create=create, **kwargs)
    elif sel0 is None:
        return take(data, sel1, axis=1, blen=blen, storage=storage,
                    create=create, **kwargs)

    # build output
    sel0_nnz = count_nonzero(sel0)
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bsel0 = sel0[i:j]
        # don't access data unless we have to
        if np.any(bsel0):
            block = np.asanyarray(data[i:j])
            res = _ndarray_subset(block, bsel0, sel1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=sel0_nnz,
                                               **kwargs)
            else:
                out.append(res)

    return out
Пример #16
0
def eval_table(tbl,
               expression,
               vm='python',
               blen=None,
               storage=None,
               create='array',
               vm_kwargs=None,
               **kwargs):
    """Evaluate `expression` against columns of a table."""

    # setup
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    length = len(columns[0])
    if vm_kwargs is None:
        vm_kwargs = dict()

    # setup vm
    if vm == 'numexpr':
        import numexpr
        evaluate = numexpr.evaluate
    elif vm == 'python':
        # noinspection PyUnusedLocal
        def evaluate(expr, local_dict=None, **kw):
            # takes no keyword arguments
            return eval(expr, dict(), local_dict)
    else:
        raise ValueError('expected vm either "numexpr" or "python"')

    # compile expression and get required columns
    variables = _get_expression_variables(expression, vm)
    required_columns = {v: columns[names.index(v)] for v in variables}

    # determine block size for evaluation
    blen = _util.get_blen_table(required_columns, blen=blen)

    # build output
    out = None
    for i in range(0, length, blen):
        j = min(i + blen, length)
        blocals = {v: c[i:j] for v, c in required_columns.items()}
        res = evaluate(expression, local_dict=blocals, **vm_kwargs)
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
Пример #17
0
def concatenate(tup,
                axis=0,
                blen=None,
                storage=None,
                create='array',
                **kwargs):
    """Concatenate arrays."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more arrays')

    if axis == 0:

        # build output
        expectedlen = sum(len(a) for a in tup)
        out = None
        for a in tup:
            ablen = _util.get_blen_array(a, blen)
            for i in range(0, len(a), ablen):
                j = min(i + ablen, len(a))
                block = a[i:j]
                if out is None:
                    out = getattr(storage, create)(block,
                                                   expectedlen=expectedlen,
                                                   **kwargs)
                else:
                    out.append(block)

    else:

        def f(*blocks):
            return np.concatenate(blocks, axis=axis)

        out = map_blocks(tup,
                         f,
                         blen=blen,
                         storage=storage,
                         create=create,
                         **kwargs)

    return out
Пример #18
0
def write_fasta(path, sequences, names, mode='w', width=80):
    """Write nucleotide sequences stored as numpy arrays to a FASTA file.

    Parameters
    ----------

    path : string
        File path.
    sequences : sequence of arrays
        One or more ndarrays of dtype 'S1' containing the sequences.
    names : sequence of strings
        Names of the sequences.
    mode : string, optional
        Use 'a' to append to an existing file.
    width : int, optional
        Maximum line width.

    """

    # check inputs
    if isinstance(sequences, np.ndarray):
        # single sequence
        sequences = [sequences]
        names = [names]
    if len(sequences) != len(names):
        raise ValueError('must provide the same number of sequences and names')
    for sequence in sequences:
        if sequence.dtype != np.dtype('S1'):
            raise ValueError('expected S1 dtype, found %r' % sequence.dtype)

    # force binary mode
    mode = 'ab' if 'a' in mode else 'wb'

    # write to file
    with open(path, mode=mode) as fasta:
        for name, sequence in zip(names, sequences):
            # force bytes
            if isinstance(name, text_type):
                name = name.encode('ascii')
            header = b'>' + name + b'\n'
            fasta.write(header)
            for i in range(0, sequence.size, width):
                line = sequence[i:i + width].tostring() + b'\n'
                fasta.write(line)
Пример #19
0
def write_fasta(path, sequences, names, mode='w', width=80):
    """Write nucleotide sequences stored as numpy arrays to a FASTA file.

    Parameters
    ----------

    path : string
        File path.
    sequences : sequence of arrays
        One or more ndarrays of dtype 'S1' containing the sequences.
    names : sequence of strings
        Names of the sequences.
    mode : string, optional
        Use 'a' to append to an existing file.
    width : int, optional
        Maximum line width.

    """

    # check inputs
    if isinstance(sequences, np.ndarray):
        # single sequence
        sequences = [sequences]
        names = [names]
    if len(sequences) != len(names):
        raise ValueError('must provide the same number of sequences and names')
    for sequence in sequences:
        if sequence.dtype != np.dtype('S1'):
            raise ValueError('expected S1 dtype, found %r' % sequence.dtype)

    # force binary mode
    mode = 'ab' if 'a' in mode else 'wb'

    # write to file
    with open(path, mode=mode) as fasta:
        for name, sequence in zip(names, sequences):
            # force bytes
            if isinstance(name, text_type):
                name = name.encode('ascii')
            header = b'>' + name + b'\n'
            fasta.write(header)
            for i in range(0, sequence.size, width):
                line = sequence[i:i+width].tostring() + b'\n'
                fasta.write(line)
Пример #20
0
def store(data, arr, start=0, stop=None, offset=0, blen=None):
    """Copy `data` block-wise into `arr`."""

    # setup
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    for i in range(start, stop, blen):
        j = min(i+blen, stop)
        l = j-i
        arr[offset:offset+l] = data[i:j]
        offset += l
Пример #21
0
def take(data, indices, axis=0, blen=None, storage=None,
         create='array', **kwargs):
    """Take elements from an array along an axis."""

    # setup
    length = len(data)

    if axis == 0:

        # check that indices are strictly increasing
        indices = np.asanyarray(indices)
        if np.any(indices[1:] <= indices[:-1]):
            raise NotImplementedError(
                'indices must be strictly increasing'
            )

        # implement via compress()
        condition = np.zeros((length,), dtype=bool)
        condition[indices] = True
        return compress(data, condition, axis=0, blen=blen, storage=storage,
                        create=create, **kwargs)

    elif axis == 1:

        # setup
        storage = _util.get_storage(storage)
        blen = _util.get_blen_array(data, blen)

        # block iteration
        out = None
        for i in range(0, length, blen):
            j = min(i+blen, length)
            block = np.asanyarray(data[i:j])
            res = np.take(block, indices, axis=1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=length,
                                               **kwargs)
            else:
                out.append(res)
        return out

    else:
        raise NotImplementedError('axis not supported: %s' % axis)
Пример #22
0
def store(data, arr, start=0, stop=None, offset=0, blen=None):
    """Copy `data` block-wise into `arr`."""

    # setup
    blen = _util.get_blen_array(data, blen)
    if stop is None:
        stop = len(data)
    else:
        stop = min(stop, len(data))
    length = stop - start
    if length < 0:
        raise ValueError('invalid stop/start')

    # copy block-wise
    for i in range(start, stop, blen):
        j = min(i + blen, stop)
        l = j - i
        arr[offset:offset + l] = data[i:j]
        offset += l
Пример #23
0
def eval_table(tbl, expression, vm='python', blen=None, storage=None,
               create='array', vm_kwargs=None, **kwargs):
    """Evaluate `expression` against columns of a table."""

    # setup
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    length = len(columns[0])
    if vm_kwargs is None:
        vm_kwargs = dict()

    # setup vm
    if vm == 'numexpr':
        import numexpr
        evaluate = numexpr.evaluate
    elif vm == 'python':
        # noinspection PyUnusedLocal
        def evaluate(expr, local_dict=None, **kw):
            # takes no keyword arguments
            return eval(expr, dict(), local_dict)
    else:
        raise ValueError('expected vm either "numexpr" or "python"')

    # compile expression and get required columns
    variables = _get_expression_variables(expression, vm)
    required_columns = {v: columns[names.index(v)] for v in variables}

    # determine block size for evaluation
    blen = _util.get_blen_table(required_columns, blen=blen)

    # build output
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        blocals = {v: c[i:j] for v, c in required_columns.items()}
        res = evaluate(expression, local_dict=blocals, **vm_kwargs)
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
Пример #24
0
def subset(data, sel0, sel1, blen=None, storage=None, create='array',
           **kwargs):
    """Return selected rows and columns of an array."""

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    sel0 = np.asanyarray(sel0)
    sel1 = np.asanyarray(sel1)

    # ensure boolean array for dim 0
    if sel0.shape[0] < length:
        # assume indices, convert to boolean condition
        tmp = np.zeros(length, dtype=bool)
        tmp[sel0] = True
        sel0 = tmp

    # ensure indices for dim 1
    if sel1.shape[0] == data.shape[1]:
        # assume boolean condition, convert to indices
        sel1 = np.nonzero(sel1)[0]

    # build output
    sel0_nnz = count_nonzero(sel0)
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)
        bsel0 = sel0[i:j]
        # don't access data unless we have to
        if np.any(bsel0):
            block = np.asanyarray(data[i:j])
            res = _ndarray_subset(block, bsel0, sel1)
            if out is None:
                out = getattr(storage, create)(res, expectedlen=sel0_nnz,
                                               **kwargs)
            else:
                out.append(res)

    return out
Пример #25
0
def compress_table(condition,
                   tbl,
                   axis=None,
                   out=None,
                   blen=None,
                   storage=None,
                   create='table',
                   **kwargs):
    """Return selected rows of a table."""

    # setup
    if axis is not None and axis != 0:
        raise NotImplementedError('only axis 0 is supported')
    if out is not None:
        # argument is only there for numpy API compatibility
        raise NotImplementedError('out argument is not supported')
    storage = _util.get_storage(storage)
    names, columns = _util.check_table_like(tbl)
    blen = _util.get_blen_table(tbl, blen)
    _util.check_equal_length(columns[0], condition)
    length = len(columns[0])
    nnz = count_nonzero(condition)

    # block iteration
    out = None
    for i in range(0, length, blen):
        j = min(i + blen, length)
        bcond = condition[i:j]
        # don't access any data unless we have to
        if np.any(bcond):
            bcolumns = [c[i:j] for c in columns]
            res = [np.compress(bcond, c, axis=0) for c in bcolumns]
            if out is None:
                out = getattr(storage, create)(res,
                                               names=names,
                                               expectedlen=nnz,
                                               **kwargs)
            else:
                out.append(res)
    return out
Пример #26
0
def vstack(tup, blen=None, storage=None, create='array', **kwargs):
    """Stack arrays in sequence vertically (row wise)."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more arrays to stack')

    # build output
    expectedlen = sum(len(a) for a in tup)
    out = None
    for a in tup:
        ablen = _util.get_blen_array(a, blen)
        for i in range(0, len(a), ablen):
            j = min(i+ablen, len(a))
            block = np.asanyarray(a[i:j])
            if out is None:
                out = getattr(storage, create)(block, expectedlen=expectedlen,
                                               **kwargs)
            else:
                out.append(block)
    return out
Пример #27
0
def vstack(tup, blen=None, storage=None, create='array', **kwargs):
    """Stack arrays in sequence vertically (row wise)."""

    # setup
    storage = _util.get_storage(storage)
    if not isinstance(tup, (tuple, list)):
        raise ValueError('expected tuple or list, found %r' % tup)
    if len(tup) < 2:
        raise ValueError('expected two or more arrays to stack')

    # build output
    expectedlen = sum(len(a) for a in tup)
    out = None
    for a in tup:
        ablen = _util.get_blen_array(a, blen)
        for i in range(0, len(a), ablen):
            j = min(i+ablen, len(a))
            block = np.asanyarray(a[i:j])
            if out is None:
                out = getattr(storage, create)(block, expectedlen=expectedlen,
                                               **kwargs)
            else:
                out.append(block)
    return out
Пример #28
0
def apply(data, f, blen=None, storage=None, create='array', **kwargs):
    """Apply function `f` block-wise over `data`."""

    # setup
    storage = _util.get_storage(storage)
    if isinstance(data, tuple):
        blen = max(_util.get_blen_array(d, blen) for d in data)
    else:
        blen = _util.get_blen_array(data, blen)
    if isinstance(data, tuple):
        _util.check_equal_length(*data)
        length = len(data[0])
    else:
        length = len(data)

    # block-wise iteration
    out = None
    for i in range(0, length, blen):
        j = min(i+blen, length)

        # obtain blocks
        if isinstance(data, tuple):
            blocks = [np.asanyarray(d[i:j]) for d in data]
        else:
            blocks = [np.asanyarray(data[i:j])]

        # map
        res = f(*blocks)

        # store
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
Пример #29
0
def map_blocks(data, f, blen=None, storage=None, create='array', **kwargs):
    """Apply function `f` block-wise over `data`."""

    # setup
    storage = _util.get_storage(storage)
    if isinstance(data, tuple):
        blen = max(_util.get_blen_array(d, blen) for d in data)
    else:
        blen = _util.get_blen_array(data, blen)
    if isinstance(data, tuple):
        _util.check_equal_length(*data)
        length = len(data[0])
    else:
        length = len(data)

    # block-wise iteration
    out = None
    for i in range(0, length, blen):
        j = min(i + blen, length)

        # obtain blocks
        if isinstance(data, tuple):
            blocks = [d[i:j] for d in data]
        else:
            blocks = [data[i:j]]

        # map
        res = f(*blocks)

        # store
        if out is None:
            out = getattr(storage, create)(res, expectedlen=length, **kwargs)
        else:
            out.append(res)

    return out
Пример #30
0
def subset(data,
           sel0=None,
           sel1=None,
           blen=None,
           storage=None,
           create='array',
           **kwargs):
    """Return selected rows and columns of an array."""

    # TODO refactor sel0 and sel1 normalization with ndarray.subset

    # setup
    storage = _util.get_storage(storage)
    blen = _util.get_blen_array(data, blen)
    length = len(data)
    if sel0 is not None:
        sel0 = np.asanyarray(sel0)
    if sel1 is not None:
        sel1 = np.asanyarray(sel1)

    # ensure boolean array for dim 0
    if sel0 is not None and sel0.dtype.kind != 'b':
        # assume indices, convert to boolean condition
        tmp = np.zeros(length, dtype=bool)
        tmp[sel0] = True
        sel0 = tmp

    # ensure indices for dim 1
    if sel1 is not None and sel1.dtype.kind == 'b':
        # assume boolean condition, convert to indices
        sel1, = np.nonzero(sel1)

    # shortcuts
    if sel0 is None and sel1 is None:
        return copy(data, blen=blen, storage=storage, create=create, **kwargs)
    elif sel1 is None:
        return compress(sel0,
                        data,
                        axis=0,
                        blen=blen,
                        storage=storage,
                        create=create,
                        **kwargs)
    elif sel0 is None:
        return take(data,
                    sel1,
                    axis=1,
                    blen=blen,
                    storage=storage,
                    create=create,
                    **kwargs)

    # build output
    sel0_nnz = count_nonzero(sel0)
    out = None
    for i in range(0, length, blen):
        j = min(i + blen, length)
        bsel0 = sel0[i:j]
        # don't access data unless we have to
        if np.any(bsel0):
            block = data[i:j]
            res = _numpy_subset(block, bsel0, sel1)
            if out is None:
                out = getattr(storage, create)(res,
                                               expectedlen=sel0_nnz,
                                               **kwargs)
            else:
                out.append(res)

    return out