Exemplo n.º 1
0
def phase_parents_by_transmission(g, window_size):
    """Phase parent genotypes from a trio or cross, given progeny genotypes
    already phased by Mendelian transmission.

    Parameters
    ----------
    g : GenotypeArray
        Genotype array, with parents as first two columns and progeny as
        remaining columns, where progeny genotypes are already phased.
    window_size : int
        Number of previous heterozygous sites to include when phasing each
        parent. A number somewhere between 10 and 100 may be appropriate,
        depending on levels of heterozygosity and quality of data.

    Returns
    -------
    g : GenotypeArray
        Genotype array with parents phased where possible.

    """

    # setup
    check_type(g, GenotypeArray)
    check_dtype(g.values, 'i1')
    check_ploidy(g.ploidy, 2)
    if g.is_phased is None:
        raise ValueError('genotype array must first have progeny phased by transmission')
    check_min_samples(g.n_samples, 3)

    # run the phasing
    is_phased = g.is_phased.view('u1')
    phase_parents_by_transmission_int8(g.values, is_phased, window_size)

    # outputs
    return g
Exemplo n.º 2
0
def phase_by_transmission(g, window_size, copy=True):
    """Phase genotypes in a trio or cross where possible using Mendelian
    transmission.

    Parameters
    ----------
    g : array_like, int, shape (n_variants, n_samples, 2)
        Genotype array, with parents as first two columns and progeny as
        remaining columns.
    window_size : int
        Number of previous heterozygous sites to include when phasing each
        parent. A number somewhere between 10 and 100 may be appropriate,
        depending on levels of heterozygosity and quality of data.
    copy : bool, optional
        If False, attempt to phase genotypes in-place. Note that this is
        only possible if the input array has int8 dtype, otherwise a copy is
        always made regardless of this parameter.

    Returns
    -------
    g : GenotypeArray
        Genotype array with progeny phased where possible.

    """

    # setup
    g = np.asarray(g, dtype='i1')
    g = GenotypeArray(g, copy=copy)
    g._values = memoryview_safe(g.values)
    check_ploidy(g.ploidy, 2)
    check_min_samples(g.n_samples, 3)

    # phase the progeny
    is_phased = _opt_phase_progeny_by_transmission(g.values)
    g.is_phased = np.asarray(is_phased).view(bool)

    # phase the parents
    _opt_phase_parents_by_transmission(g.values, is_phased, window_size)

    return g
Exemplo n.º 3
0
def phase_progeny_by_transmission(g):
    """Phase progeny genotypes from a trio or cross using Mendelian
    transmission.

    Parameters
    ----------
    g : array_like, int, shape (n_variants, n_samples, 2)
        Genotype array, with parents as first two columns and progeny as
        remaining columns.

    Returns
    -------
    g : ndarray, int8, shape (n_variants, n_samples, 2)
        Genotype array with progeny phased where possible.

    Examples
    --------
    >>> import allel
    >>> g = allel.GenotypeArray([
    ...     [[0, 0], [0, 0], [0, 0]],
    ...     [[1, 1], [1, 1], [1, 1]],
    ...     [[0, 0], [1, 1], [0, 1]],
    ...     [[1, 1], [0, 0], [0, 1]],
    ...     [[0, 0], [0, 1], [0, 0]],
    ...     [[0, 0], [0, 1], [0, 1]],
    ...     [[0, 1], [0, 0], [0, 1]],
    ...     [[0, 1], [0, 1], [0, 1]],
    ...     [[0, 1], [1, 2], [0, 1]],
    ...     [[1, 2], [0, 1], [1, 2]],
    ...     [[0, 1], [2, 3], [0, 2]],
    ...     [[2, 3], [0, 1], [1, 3]],
    ...     [[0, 0], [0, 0], [-1, -1]],
    ...     [[0, 0], [0, 0], [1, 1]],
    ... ], dtype='i1')
    >>> g = allel.phase_progeny_by_transmission(g)
    >>> print(g.to_str(row_threshold=None))
    0/0 0/0 0|0
    1/1 1/1 1|1
    0/0 1/1 0|1
    1/1 0/0 1|0
    0/0 0/1 0|0
    0/0 0/1 0|1
    0/1 0/0 1|0
    0/1 0/1 0/1
    0/1 1/2 0|1
    1/2 0/1 2|1
    0/1 2/3 0|2
    2/3 0/1 3|1
    0/0 0/0 ./.
    0/0 0/0 1/1
    >>> g.is_phased
    array([[False, False,  True],
           [False, False,  True],
           [False, False,  True],
           [False, False,  True],
           [False, False,  True],
           [False, False,  True],
           [False, False,  True],
           [False, False, False],
           [False, False,  True],
           [False, False,  True],
           [False, False,  True],
           [False, False,  True],
           [False, False, False],
           [False, False, False]])

    """

    # setup
    g = GenotypeArray(g, dtype='i1', copy=True)
    check_ploidy(g.ploidy, 2)
    check_min_samples(g.n_samples, 3)

    # run the phasing
    # N.B., a copy has already been made, so no need to make memoryview safe
    is_phased = _opt_phase_progeny_by_transmission(g.values)
    g.is_phased = np.asarray(is_phased).view(bool)

    # outputs
    return g