示例#1
0
def pw_align(
        seqA,
        seqB,
        gop=-1,
        scale=0.5,
        scorer=False,
        mode='global',
        distance=False,
        **keywords):
    """
    Align two sequences in various ways.

    Parameters
    ----------
    seqA, seqB : {text_type, list, tuple}
        The input strings. These should be iterables, so you can use tuples,
        lists, or strings.
    scorer : dict (default=False)
        If set to c{False} a scorer will automatically be calculated,
        otherwise, the scorer needs to be passed as a dictionary that covers
        all segment matches between the input strings.
    gop : int (default=-1)
        The gap opening penalty.
    scale : float (default=0.5)
        The gap extension scale. This scale is similar to the gap extension
        penalty, but in contrast to the traditional GEP, it "scales" the gap
        opening penalty.
    mode : {"global", "local", "dialign", "overlap"} (default="global")
        Select between one of the four different alignment modes regularly
        implemented in LingPy, see :evobib:`List2012a` for details.
    distance : bool (default=False)
        If set to c{True} return the distance score following the formula by
        :evobib:`Downey2008`. Otherwise, return the basic similarity score.

    Examples
    --------
    Align two words using the dialign algorithm::
        >>> seqA = 'fat cat'
        >>> seqB = 'catfat'
        >>> pw_align(seqA, seqB, mode='dialign')
        (['f', 'a', 't', ' ', 'c', 'a', 't', '-', '-', '-'],
         ['-', '-', '-', '-', 'c', 'a', 't', 'f', 'a', 't'],
         3.0)

    """
    seqA, seqB = _as_lists(seqA, seqB)
    distance = 1 if distance else 0

    if not scorer and distance == 0:
        scorer = _get_scorer(seqA, seqB)
    elif not scorer and distance == 1:
        scorer = {}
        for a, b in multicombinations2(sorted(set(seqA + seqB))):
            scorer[b, a] = scorer[a, b] = 1.0 if a == b else -1.0

    # start alignment
    return talign.align_pair(seqA, seqB, gop, scale, scorer, mode, distance)
示例#2
0
def pw_align(
        seqA,
        seqB,
        gop=-1,
        scale=0.5,
        scorer=False,
        mode='global',
        distance=False,
        **keywords):
    """
    Align two sequences in various ways.

    Parameters
    ----------
    seqA, seqB : {text_type, list, tuple}
        The input strings. These should be iterables, so you can use tuples,
        lists, or strings.
    scorer : dict (default=False)
        If set to c{False} a scorer will automatically be calculated,
        otherwise, the scorer needs to be passed as a dictionary that covers
        all segment matches between the input strings.
    gop : int (default=-1)
        The gap opening penalty.
    scale : float (default=0.5)
        The gap extension scale. This scale is similar to the gap extension
        penalty, but in contrast to the traditional GEP, it "scales" the gap
        opening penalty.
    mode : {"global", "local", "dialign", "overlap"} (default="global")
        Select between one of the four different alignment modes regularly
        implemented in LingPy, see :evobib:`List2012a` for details.
    distance : bool (default=False)
        If set to c{True} return the distance score following the formula by
        :evobib:`Downey2008`. Otherwise, return the basic similarity score.

    Examples
    --------
    Align two words using the dialign algorithm::
        >>> seqA = 'fat cat'
        >>> seqB = 'catfat'
        >>> pw_align(seqA, seqB, mode='dialign')
        (['f', 'a', 't', ' ', 'c', 'a', 't', '-', '-', '-'],
         ['-', '-', '-', '-', 'c', 'a', 't', 'f', 'a', 't'],
         3.0)

    """
    seqA, seqB = _as_lists(seqA, seqB)
    distance = 1 if distance else 0

    if not scorer and distance == 0:
        scorer = _get_scorer(seqA, seqB)
    elif not scorer and distance == 1:
        scorer = {}
        for a, b in multicombinations2(sorted(set(seqA + seqB))):
            scorer[b, a] = scorer[a, b] = 1.0 if a == b else -1.0

    # start alignment
    return talign.align_pair(seqA, seqB, gop, scale, scorer, mode, distance)