Exemplo n.º 1
0
def map_signal_to_squiggle(data,
                           sequence,
                           model='squiggle_r94',
                           rate=1.0,
                           back_prob=0.0,
                           local_pen=2.0,
                           skip_pen=5000.0,
                           min_score=5.0):
    """Align a squiggle to a sequence using a simulated squiggle.

    :param data: `ndarray` containing raw signal data.
    :param sequence: base sequence to which to align data.
    :param model: model to use in simulating squiggle.
    :param rate: rate of translocation relative to squiggle model
    :param back_prob: probability of backward movement.
    :param local_pen: penalty for local alignment.
    :param skip_pen: penalty for skipping position in sequence.
    :param min_score: floor on match score.

    :returns: tuple containing (alignment score, alignment path)
    """
    raw = RawTable(data)
    raw.trim().scale()

    squiggle = sequence_to_squiggle(sequence, model=model)

    path = np.ascontiguousarray(np.zeros(raw._rt.n, dtype=np.int32))
    p_path = ffi.cast("int32_t *", ffi.from_buffer(path))

    score = lib.squiggle_match_viterbi(raw.data(), rate, squiggle.data(),
                                       back_prob, local_pen, skip_pen,
                                       min_score, p_path)

    return score, path
Exemplo n.º 2
0
def _decode_post(post,
                 stay_pen=0.0,
                 skip_pen=0.0,
                 local_pen=2.0,
                 use_slip=False):
    """Decode a posterior using Viterbi algorithm for transducer.

    :param post: a `ScrappyMatrix` containing transducer posteriors.
    :param stay_pen: penalty for staying.
    :param skip_pen: penalty for skipping a base.
    :param local_pen: penalty for local basecalling.
    :param use_slip: allow slipping (movement more than 2 bases).

    :returns: tuple containing (call, score, call positions per raw block).
    """
    nblock, nstate = post.shape

    path = ffi.new("int[{}]".format(nblock + 1))
    score = lib.decode_transducer(post.data(), stay_pen, skip_pen, local_pen,
                                  path, use_slip)

    pos = np.zeros(nblock + 1, dtype=np.int32)
    p_pos = ffi.cast("int *", pos.ctypes.data)
    basecall = lib.overlapper(path, nblock + 1, nstate - 1, p_pos)

    return ffi.string(basecall).decode(), score, pos
Exemplo n.º 3
0
def map_signal_to_squiggle(data, sequence, back_prob=0.0, local_pen=2.0, min_score=5.0):
    """Align a squiggle to a sequence using a simulated squiggle.

    :param data: `ndarray` containing raw signal data.
    :param sequence: base sequence to which to align data.
    :param back_prob: probability of backward movement.
    :param local_pen: penalty for local alignment.
    :param min_score: floor on match score.

    :returns: tuple containing (alignment score, alignment path)
    """
    raw = RawTable(data)
    raw.trim().scale()
    
    squiggle = sequence_to_squiggle(sequence)
    if squiggle is None:
        return None


    path = np.ascontiguousarray(np.zeros(raw._rt.n, dtype=np.int32))
    p_path = ffi.cast("int32_t *", ffi.from_buffer(path))

    score = lib.squiggle_match_viterbi(raw.data(), squiggle, back_prob, local_pen, min_score, p_path)
    free_matrix(squiggle)

    return score, path
Exemplo n.º 4
0
def _numpy_to_scrappy_matrix(numpy_array):
    """Convert a `ndarray` to a bare `scrappie_matrix`"""
    nc = numpy_array.shape[0]
    nr = numpy_array.shape[1]

    data = np.ascontiguousarray(numpy_array.astype(ftype, order='C', copy=False))
    buf = ffi.cast("float *", data.ctypes.data)
    return lib.mat_from_array(buf, nr, nc)
Exemplo n.º 5
0
def _decode_post_crf(post):
    """Decode a posterior using Viterbi algorithm for conditional random field.

    :param post: a `ScrappyMatrix` containing CRF transitions.

    :returns: tuple containing (basecall, score, call positions per raw data block).
    """
    nblock, nstate = post.shape

    path = ffi.new("int[{}]".format(nblock + 1))
    score = lib.decode_crf(post.data(), path)

    pos = np.ascontiguousarray(np.zeros(nblock + 1, dtype=np.int32))
    p_pos = ffi.cast("int *", ffi.from_buffer(pos))
    basecall = lib.crfpath_to_basecall(path, nblock, p_pos)

    return ffi.string(basecall).decode(), score, pos
Exemplo n.º 6
0
    def __init__(self, data, start=0, end=None):
        """Representation of a scrappie `raw_table`.

        :param data: `nd.array` containing raw data.

        ..note:: The class stores a reference to a contiguous numpy array of
            the correct type to be passed to the extension library. The class
            provides safety against the original data being garbage collected.
            To obtain an up-to-date (possibly trimmed and scaled) copy of the
            data use `raw_table.data(as_numpy=True)`.
        """
        if end is None:
            end = len(data)

        self._data = np.ascontiguousarray(data.astype(ftype, order='C', copy=True))
        rt = ffi.new('raw_table *')
        rt.n = len(self._data)
        rt.start = start
        rt.end = end
        rt.raw = ffi.cast("float *", ffi.from_buffer(self._data))
        self._rt = rt[0]
Exemplo n.º 7
0
def map_post_to_sequence(post,
                         sequence,
                         stay_pen=0,
                         skip_pen=0,
                         local_pen=4.0,
                         viterbi=False,
                         path=False,
                         bands=None):
    """Block-based local-global alignment of a squiggle to a sequence using
    either Forward or Viterbi algorithm. For the latter the Viterbi path can
    optionally be calculated.

    :param post: a `ScrappyMatrix` containing log-probabilities (as from
       `calc_post`).
    :param sequence: a base sequence which to map.
    :param stay_pen: penalty for zero-state movement from one block to next.
    :param skip_pen: penalty for two-state movement from one block to next.
    :param local_pen: penalty for local alignment through blocks
    :param viterbi: use Viterbi algorithm rather than forward.
    :param path: calculate alignment path (only valid for `viterbi==True`
        and `bands==None`).
    :param bands: two sequences containing lower and upper extremal allowed
        positions for each block. Should be length corresponding to number
        of blocks of `post`. If a single number is given, a diagonal band with
        width 2 * `bands` * #states / #blocks will be used. If `None` is given
        banding is not used (a full DP matrix is evaluated).

    :returns: (score, path), (or (None, *) in the case of failure).

    ..note:: if `viterbi`==False or `path`==False, the returned path will
        be `None`.
    """

    if path and not viterbi:
        raise ValueError('Cannot calulate path with `viterbi==False`.')
    if not isinstance(post, ScrappyMatrix):
        raise TypeError('`post` should be a ScrappyMatrix.')

    nblock, nstate = post.shape
    alpha_len, kmer_len = guess_state_properties(nstate)

    seq_len = len(sequence) - kmer_len + 1
    p_seq = _none_if_null(
        lib.encode_bases_to_integers(sequence.encode(), len(sequence),
                                     kmer_len))
    if p_seq is None:
        raise RuntimeError(
            'An unknown error occurred whilst encoding sequence.')

    if viterbi and path:
        path_data = np.zeros(nblock, dtype=np.int32)
        p_path = ffi.cast("int *", ffi.from_buffer(path_data))
    else:
        path_data = None
        p_path = ffi.NULL

    if bands is None:
        if viterbi:
            score = lib.map_to_sequence_viterbi(post.data(), stay_pen,
                                                skip_pen, local_pen, p_seq,
                                                seq_len, p_path)
        else:
            score = lib.map_to_sequence_forward(post.data(), stay_pen,
                                                skip_pen, local_pen, p_seq,
                                                seq_len)
    else:
        if isinstance(bands, int):
            # create a monotonic diagonal band
            gradient = seq_len / nblock
            bands = 2 * bands * gradient
            hband = bands / 2
            bands = [
                np.ascontiguousarray(np.array(x, dtype=np.uintp))
                for x in ([(max(0, x * gradient - hband))
                           for x in range(nblock)],
                          [(min(seq_len, x * gradient + hband))
                           for x in range(nblock)])
            ]
        elif len(bands) == 2:
            bands = [np.ascontiguousarray(x, dtype=np.uintp) for x in bands]
        else:
            raise ValueError(
                '`bands` should be `None`, an integer, or length 2.')

        p_poslow, p_poshigh = (ffi.cast("size_t *", ffi.from_buffer(x))
                               for x in bands)
        if not lib.are_bounds_sane(p_poslow, p_poshigh, nblock, seq_len):
            raise ValueError('Supplied banding structure is not valid.')

        if viterbi:
            func = lib.map_to_sequence_viterbi_banded
        else:
            func = lib.map_to_sequence_forward_banded
        score = func(post.data(), stay_pen, skip_pen, local_pen, p_seq,
                     seq_len, p_poslow, p_poshigh)

    score = _none_if_null(score)
    if score is None:
        raise RuntimeError('An unknown error occurred during alignment.')

    return score, path_data