示例#1
0
def paint_transmission(parent_haplotypes, progeny_haplotypes):
    """Paint haplotypes inherited from a single diploid parent according to
    their allelic inheritance.

    Parameters
    ----------
    parent_haplotypes : array_like, int, shape (n_variants, 2)
        Both haplotypes from a single diploid parent.
    progeny_haplotypes : array_like, int, shape (n_variants, n_progeny)
        Haplotypes found in progeny of the given parent, inherited from the
        given parent. I.e., haplotypes from gametes of the given parent.

    Returns
    -------
    painting : ndarray, uint8, shape (n_variants, n_progeny)
        An array of integers coded as follows: 1 = allele inherited from
        first parental haplotype; 2 = allele inherited from second parental
        haplotype; 3 = reference allele, also carried by both parental
        haplotypes; 4 = non-reference allele, also carried by both parental
        haplotypes; 5 = non-parental allele; 6 = either or both parental
        alleles missing; 7 = missing allele; 0 = undetermined.

    Examples
    --------
    >>> import allel
    >>> haplotypes = allel.HaplotypeArray([
    ...     [0, 0, 0, 1, 2, -1],
    ...     [0, 1, 0, 1, 2, -1],
    ...     [1, 0, 0, 1, 2, -1],
    ...     [1, 1, 0, 1, 2, -1],
    ...     [0, 2, 0, 1, 2, -1],
    ...     [0, -1, 0, 1, 2, -1],
    ...     [-1, 1, 0, 1, 2, -1],
    ...     [-1, -1, 0, 1, 2, -1],
    ... ], dtype='i1')
    >>> painting = allel.paint_transmission(haplotypes[:, :2],
    ...                                           haplotypes[:, 2:])
    >>> painting
    array([[3, 5, 5, 7],
           [1, 2, 5, 7],
           [2, 1, 5, 7],
           [5, 4, 5, 7],
           [1, 5, 2, 7],
           [6, 6, 6, 7],
           [6, 6, 6, 7],
           [6, 6, 6, 7]], dtype=uint8)

    """

    # check inputs
    parent_haplotypes = HaplotypeArray(parent_haplotypes)
    progeny_haplotypes = HaplotypeArray(progeny_haplotypes)
    if parent_haplotypes.n_haplotypes != 2:
        raise ValueError('exactly two parental haplotypes should be provided')

    # convenience variables
    parent1 = parent_haplotypes[:, 0, np.newaxis]
    parent2 = parent_haplotypes[:, 1, np.newaxis]
    progeny_is_missing = progeny_haplotypes < 0
    parent_is_missing = np.any(parent_haplotypes < 0, axis=1)
    # need this for broadcasting, but also need to retain original for later
    parent_is_missing_bc = parent_is_missing[:, np.newaxis]
    parent_diplotype = GenotypeArray(parent_haplotypes[:, np.newaxis, :])
    parent_is_hom_ref = parent_diplotype.is_hom_ref()
    parent_is_het = parent_diplotype.is_het()
    parent_is_hom_alt = parent_diplotype.is_hom_alt()

    # identify allele calls where inheritance can be determined
    is_callable = ~progeny_is_missing & ~parent_is_missing_bc
    is_callable_seg = is_callable & parent_is_het

    # main inheritance states
    inherit_parent1 = is_callable_seg & (progeny_haplotypes == parent1)
    inherit_parent2 = is_callable_seg & (progeny_haplotypes == parent2)
    nonseg_ref = (is_callable & parent_is_hom_ref & (progeny_haplotypes == parent1))
    nonseg_alt = (is_callable & parent_is_hom_alt & (progeny_haplotypes == parent1))
    nonparental = (
        is_callable & (progeny_haplotypes != parent1) & (progeny_haplotypes != parent2)
    )

    # record inheritance states
    # N.B., order in which these are set matters
    painting = np.zeros(progeny_haplotypes.shape, dtype='u1')
    painting[inherit_parent1] = INHERIT_PARENT1
    painting[inherit_parent2] = INHERIT_PARENT2
    painting[nonseg_ref] = INHERIT_NONSEG_REF
    painting[nonseg_alt] = INHERIT_NONSEG_ALT
    painting[nonparental] = INHERIT_NONPARENTAL
    painting[parent_is_missing] = INHERIT_PARENT_MISSING
    painting[progeny_is_missing] = INHERIT_MISSING

    return painting