Exemplo n.º 1
0
def test_ihh_scan_d():
    # edge case: start from 0 haplotype homozygosity
    gaps = np.array([10], dtype='f8')
    h = np.array([[0, 1], [1, 0]])

    expect = [0, 0]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False)
    assert_array_nanclose(expect, actual)

    expect = [0, 0]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True)
    assert_array_nanclose(expect, actual)
Exemplo n.º 2
0
def test_ihh_scan_c():
    # simple case: 1 haplotype pair, haplotype homozygosity decays
    gaps = np.array([10, 10], dtype='f8')
    h = np.array([[0, 1], [0, 0], [0, 0]])

    # do not include edges
    expect = [0, 5, 15]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False)
    assert_array_nanclose(expect, actual)

    # include edges
    expect = [0, 5, 15]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True)
    assert_array_nanclose(expect, actual)
Exemplo n.º 3
0
def test_ihh_scan_a():
    # simple case: 1 haplotype pair, haplotype homozygosity over all variants
    gaps = np.array([10, 10], dtype='f8')
    h = np.array([[0, 0], [0, 0], [0, 0]])

    # do not include edges
    expect = [np.nan, np.nan, np.nan]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False)
    assert_array_nanclose(expect, actual)

    # include edges
    expect = [0, 10, 20]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True)
    assert_array_nanclose(expect, actual)
Exemplo n.º 4
0
def test_ihh_scan_b():
    # 1 haplotype pair, haplotype homozygosity over all variants
    # handling of large gap (encoded as -1)
    gaps = np.array([10, -1], dtype='f8')
    h = np.array([[0, 0], [0, 0], [0, 0]])

    # do not include edges
    expect = [np.nan, np.nan, np.nan]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False)
    assert_array_nanclose(expect, actual)

    # include edges
    expect = [0, 10, np.nan]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True)
    assert_array_nanclose(expect, actual)
Exemplo n.º 5
0
def test_ihh_scan_e():
    # edge case: start from haplotype homozygosity below min_ehh
    gaps = np.array([10], dtype='f8')
    h = np.array([[0, 0, 1], [0, 1, 0]])

    expect = [np.nan, 10 / 6]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False)
    assert_array_almost_equal(expect, actual)

    expect = [0, 10 / 6]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True)
    assert_array_almost_equal(expect, actual)

    expect = [0, 0]
    actual = ihh_scan(h, gaps, min_ehh=0.5, include_edges=False)
    assert_array_almost_equal(expect, actual)

    expect = [0, 0]
    actual = ihh_scan(h, gaps, min_ehh=0.5, include_edges=True)
    assert_array_almost_equal(expect, actual)
Exemplo n.º 6
0
def xpehh(h1,
          h2,
          pos,
          map_pos=None,
          min_ehh=0.05,
          include_edges=False,
          gap_scale=20000,
          max_gap=200000,
          is_accessible=None,
          use_threads=True):
    """Compute the unstandardized cross-population extended haplotype
    homozygosity score (XPEHH) for each variant.

    Parameters
    ----------
    h1 : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array for the first population.
    h2 : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array for the second population.
    pos : array_like, int, shape (n_variants,)
        Variant positions on physical or genetic map.
    map_pos : array_like, float, shape (n_variants,)
        Variant positions (genetic map distance).
    min_ehh: float, optional
        Minimum EHH beyond which to truncate integrated haplotype
        homozygosity calculation.
    include_edges : bool, optional
        If True, report scores even if EHH does not decay below `min_ehh`
        before reaching the edge of the data.
    gap_scale : int, optional
        Rescale distance between variants if gap is larger than this value.
    max_gap : int, optional
        Do not report scores if EHH spans a gap larger than this number of
        base pairs.
    is_accessible : array_like, bool, optional
        Genome accessibility array. If provided, distance between variants
        will be computed as the number of accessible bases between them.
    use_threads : bool, optional
        If True use multiple threads to compute.

    Returns
    -------
    score : ndarray, float, shape (n_variants,)
        Unstandardized XPEHH scores.

    Notes
    -----

    This function will calculate XPEHH for all variants. To exclude variants
    below a given minor allele frequency, filter the input haplotype arrays
    before passing to this function.

    This function returns NaN for any EHH calculations where haplotype
    homozygosity does not decay below `min_ehh` before reaching the first or
    last variant. To disable this behaviour, set `include_edges` to True.

    Note that the unstandardized score is returned. Usually these scores are
    then standardized genome-wide.

    Haplotype arrays from the two populations may have different numbers of
    haplotypes.

    See Also
    --------
    standardize

    """

    # check inputs
    h1 = asarray_ndim(h1, 2)
    check_integer_dtype(h1)
    h2 = asarray_ndim(h2, 2)
    check_integer_dtype(h2)
    pos = asarray_ndim(pos, 1)
    check_dim0_aligned(h1, h2, pos)

    # compute gaps between variants for integration
    gaps = compute_ihh_gaps(pos, map_pos, gap_scale, max_gap, is_accessible)

    # setup kwargs
    kwargs = dict(min_ehh=min_ehh, include_edges=include_edges)

    if use_threads and multiprocessing.cpu_count() > 1:
        # use multiple threads

        # setup threadpool
        pool = ThreadPool(min(4, multiprocessing.cpu_count()))

        # scan forward
        res1_fwd = pool.apply_async(ihh_scan, (h1, gaps), kwargs)
        res2_fwd = pool.apply_async(ihh_scan, (h2, gaps), kwargs)

        # scan backward
        res1_rev = pool.apply_async(ihh_scan, (h1[::-1], gaps[::-1]), kwargs)
        res2_rev = pool.apply_async(ihh_scan, (h2[::-1], gaps[::-1]), kwargs)

        # wait for both to finish
        pool.close()
        pool.join()

        # obtain results
        ihh1_fwd = res1_fwd.get()
        ihh2_fwd = res2_fwd.get()
        ihh1_rev = res1_rev.get()
        ihh2_rev = res2_rev.get()

        # cleanup
        pool.terminate()

    else:
        # compute without threads

        # scan forward
        ihh1_fwd = ihh_scan(h1, gaps, **kwargs)
        ihh2_fwd = ihh_scan(h2, gaps, **kwargs)

        # scan backward
        ihh1_rev = ihh_scan(h1[::-1], gaps[::-1], **kwargs)
        ihh2_rev = ihh_scan(h2[::-1], gaps[::-1], **kwargs)

    # handle reverse scans
    ihh1_rev = ihh1_rev[::-1]
    ihh2_rev = ihh2_rev[::-1]

    # compute unstandardized score
    ihh1 = ihh1_fwd + ihh1_rev
    ihh2 = ihh2_fwd + ihh2_rev
    score = np.log(ihh1 / ihh2)

    return score