示例#1
0
    def test_pairwise_distance_multidim(self):
        g = GenotypeArray([[[0, 0], [0, 0]],
                           [[1, 1], [1, 1]],
                           [[1, 1], [2, 2]],
                           [[0, 0], [0, 1]],
                           [[0, 0], [0, 2]],
                           [[1, 1], [1, 2]],
                           [[0, 1], [0, 1]],
                           [[0, 1], [1, 2]],
                           [[0, 0], [-1, -1]],
                           [[0, 1], [-1, -1]],
                           [[-1, -1], [-1, -1]]], dtype='i1')
        gac = g.to_allele_counts()

        def metric(ac1, ac2):
            mpd = allel.stats.mean_pairwise_difference_between(ac1, ac2,
                                                               fill=0)
            return mpd.sum()

        expect = [
            allel.stats.mean_pairwise_difference_between(gac[:, 0], gac[:, 1],
                                                         fill=0).sum()]
        actual = allel.stats.pairwise_distance(gac, metric)
        aeq(expect, actual)
示例#2
0
def mendel_errors(parent_genotypes, progeny_genotypes):
    """Locate genotype calls not consistent with Mendelian transmission of
    alleles.

    Parameters
    ----------
    parent_genotypes : array_like, int, shape (n_variants, 2, 2)
        Genotype calls for the two parents.
    progeny_genotypes : array_like, int, shape (n_variants, n_progeny, 2)
        Genotype calls for the progeny.

    Returns
    -------
    me : ndarray, int, shape (n_variants, n_progeny)
        Count of Mendel errors for each progeny genotype call.

    Examples
    --------
    The following are all consistent with Mendelian transmission. Note that a
    value of 0 is returned for missing calls::

        >>> import allel
        >>> import numpy as np
        >>> genotypes = np.array([
        ...     # aa x aa -> aa
        ...     [[0, 0], [0, 0], [0, 0], [-1, -1], [-1, -1], [-1, -1]],
        ...     [[1, 1], [1, 1], [1, 1], [-1, -1], [-1, -1], [-1, -1]],
        ...     [[2, 2], [2, 2], [2, 2], [-1, -1], [-1, -1], [-1, -1]],
        ...     # aa x ab -> aa or ab
        ...     [[0, 0], [0, 1], [0, 0], [0, 1], [-1, -1], [-1, -1]],
        ...     [[0, 0], [0, 2], [0, 0], [0, 2], [-1, -1], [-1, -1]],
        ...     [[1, 1], [0, 1], [1, 1], [0, 1], [-1, -1], [-1, -1]],
        ...     # aa x bb -> ab
        ...     [[0, 0], [1, 1], [0, 1], [-1, -1], [-1, -1], [-1, -1]],
        ...     [[0, 0], [2, 2], [0, 2], [-1, -1], [-1, -1], [-1, -1]],
        ...     [[1, 1], [2, 2], [1, 2], [-1, -1], [-1, -1], [-1, -1]],
        ...     # aa x bc -> ab or ac
        ...     [[0, 0], [1, 2], [0, 1], [0, 2], [-1, -1], [-1, -1]],
        ...     [[1, 1], [0, 2], [0, 1], [1, 2], [-1, -1], [-1, -1]],
        ...     # ab x ab -> aa or ab or bb
        ...     [[0, 1], [0, 1], [0, 0], [0, 1], [1, 1], [-1, -1]],
        ...     [[1, 2], [1, 2], [1, 1], [1, 2], [2, 2], [-1, -1]],
        ...     [[0, 2], [0, 2], [0, 0], [0, 2], [2, 2], [-1, -1]],
        ...     # ab x bc -> ab or ac or bb or bc
        ...     [[0, 1], [1, 2], [0, 1], [0, 2], [1, 1], [1, 2]],
        ...     [[0, 1], [0, 2], [0, 0], [0, 1], [0, 1], [1, 2]],
        ...     # ab x cd -> ac or ad or bc or bd
        ...     [[0, 1], [2, 3], [0, 2], [0, 3], [1, 2], [1, 3]],
        ... ])
        >>> me = allel.mendel_errors(genotypes[:, :2], genotypes[:, 2:])
        >>> me
        array([[0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0],
               [0, 0, 0, 0]])

    The following are cases of 'non-parental' inheritance where one or two
    alleles are found in the progeny that are not present in either parent.
    Note that the number of errors may be 1 or 2 depending on the number of
    non-parental alleles::

        >>> genotypes = np.array([
        ...     # aa x aa -> ab or ac or bb or cc
        ...     [[0, 0], [0, 0], [0, 1], [0, 2], [1, 1], [2, 2]],
        ...     [[1, 1], [1, 1], [0, 1], [1, 2], [0, 0], [2, 2]],
        ...     [[2, 2], [2, 2], [0, 2], [1, 2], [0, 0], [1, 1]],
        ...     # aa x ab -> ac or bc or cc
        ...     [[0, 0], [0, 1], [0, 2], [1, 2], [2, 2], [2, 2]],
        ...     [[0, 0], [0, 2], [0, 1], [1, 2], [1, 1], [1, 1]],
        ...     [[1, 1], [0, 1], [1, 2], [0, 2], [2, 2], [2, 2]],
        ...     # aa x bb -> ac or bc or cc
        ...     [[0, 0], [1, 1], [0, 2], [1, 2], [2, 2], [2, 2]],
        ...     [[0, 0], [2, 2], [0, 1], [1, 2], [1, 1], [1, 1]],
        ...     [[1, 1], [2, 2], [0, 1], [0, 2], [0, 0], [0, 0]],
        ...     # ab x ab -> ac or bc or cc
        ...     [[0, 1], [0, 1], [0, 2], [1, 2], [2, 2], [2, 2]],
        ...     [[0, 2], [0, 2], [0, 1], [1, 2], [1, 1], [1, 1]],
        ...     [[1, 2], [1, 2], [0, 1], [0, 2], [0, 0], [0, 0]],
        ...     # ab x bc -> ad or bd or cd or dd
        ...     [[0, 1], [1, 2], [0, 3], [1, 3], [2, 3], [3, 3]],
        ...     [[0, 1], [0, 2], [0, 3], [1, 3], [2, 3], [3, 3]],
        ...     [[0, 2], [1, 2], [0, 3], [1, 3], [2, 3], [3, 3]],
        ...     # ab x cd -> ae or be or ce or de
        ...     [[0, 1], [2, 3], [0, 4], [1, 4], [2, 4], [3, 4]],
        ... ])
        >>> me = allel.mendel_errors(genotypes[:, :2], genotypes[:, 2:])
        >>> me
        array([[1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 2, 2],
               [1, 1, 1, 2],
               [1, 1, 1, 2],
               [1, 1, 1, 2],
               [1, 1, 1, 1]])

    The following are cases of 'hemi-parental' inheritance, where progeny
    appear to have inherited two copies of an allele found only once in one of
    the parents::

        >>> genotypes = np.array([
        ...     # aa x ab -> bb
        ...     [[0, 0], [0, 1], [1, 1], [-1, -1]],
        ...     [[0, 0], [0, 2], [2, 2], [-1, -1]],
        ...     [[1, 1], [0, 1], [0, 0], [-1, -1]],
        ...     # ab x bc -> aa or cc
        ...     [[0, 1], [1, 2], [0, 0], [2, 2]],
        ...     [[0, 1], [0, 2], [1, 1], [2, 2]],
        ...     [[0, 2], [1, 2], [0, 0], [1, 1]],
        ...     # ab x cd -> aa or bb or cc or dd
        ...     [[0, 1], [2, 3], [0, 0], [1, 1]],
        ...     [[0, 1], [2, 3], [2, 2], [3, 3]],
        ... ])
        >>> me = allel.mendel_errors(genotypes[:, :2], genotypes[:, 2:])
        >>> me
        array([[1, 0],
               [1, 0],
               [1, 0],
               [1, 1],
               [1, 1],
               [1, 1],
               [1, 1],
               [1, 1]])

    The following are cases of 'uni-parental' inheritance, where progeny
    appear to have inherited both alleles from a single parent::

        >>> genotypes = np.array([
        ...     # aa x bb -> aa or bb
        ...     [[0, 0], [1, 1], [0, 0], [1, 1]],
        ...     [[0, 0], [2, 2], [0, 0], [2, 2]],
        ...     [[1, 1], [2, 2], [1, 1], [2, 2]],
        ...     # aa x bc -> aa or bc
        ...     [[0, 0], [1, 2], [0, 0], [1, 2]],
        ...     [[1, 1], [0, 2], [1, 1], [0, 2]],
        ...     # ab x cd -> ab or cd
        ...     [[0, 1], [2, 3], [0, 1], [2, 3]],
        ... ])
        >>> me = allel.mendel_errors(genotypes[:, :2], genotypes[:, 2:])
        >>> me
        array([[1, 1],
               [1, 1],
               [1, 1],
               [1, 1],
               [1, 1],
               [1, 1]])

    """

    # setup
    parent_genotypes = GenotypeArray(parent_genotypes)
    progeny_genotypes = GenotypeArray(progeny_genotypes)
    check_ploidy(parent_genotypes.ploidy, 2)
    check_ploidy(progeny_genotypes.ploidy, 2)

    # transform into per-call allele counts
    max_allele = max(parent_genotypes.max(), progeny_genotypes.max())
    parent_gc = parent_genotypes.to_allele_counts(max_allele=max_allele, dtype='i1')
    progeny_gc = progeny_genotypes.to_allele_counts(max_allele=max_allele, dtype='i1')

    # detect nonparental and hemiparental inheritance by comparing allele
    # counts between parents and progeny
    max_progeny_gc = parent_gc.clip(max=1).sum(axis=1)
    max_progeny_gc = max_progeny_gc[:, np.newaxis, :]
    me = (progeny_gc - max_progeny_gc).clip(min=0).sum(axis=2)

    # detect uniparental inheritance by finding cases where no alleles are
    # shared between parents, then comparing progeny allele counts to each
    # parent
    p1_gc = parent_gc[:, 0, np.newaxis, :]
    p2_gc = parent_gc[:, 1, np.newaxis, :]
    # find variants where parents don't share any alleles
    is_shared_allele = (p1_gc > 0) & (p2_gc > 0)
    no_shared_alleles = ~np.any(is_shared_allele, axis=2)
    # find calls where progeny genotype is identical to one or the other parent
    me[no_shared_alleles &
       (np.all(progeny_gc == p1_gc, axis=2) |
        np.all(progeny_gc == p2_gc, axis=2))] = 1

    # retrofit where either or both parent has a missing call
    me[np.any(parent_genotypes.is_missing(), axis=1)] = 0

    return me