def test_ihh01_scan_d(): gaps = np.array([10, 10, 10], dtype='f8') h = np.array([[0, 0, 1, 1, 1, 0], [0, 1, 0, 1, 0, 1], [1, 0, 0, 0, 1, 1], [0, 0, 0, 1, 1, 1]]) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0.05) x = (10 * (1 + 1 / 3) / 2) + (10 * (1 / 3 + 0) / 2) expect_ihh0 = [np.nan, np.nan, x, x] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [np.nan, np.nan, x, x] assert_array_almost_equal(expect_ihh1, ihh1) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0) expect_ihh0 = [np.nan, np.nan, x, x] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [np.nan, np.nan, x, x] assert_array_almost_equal(expect_ihh1, ihh1) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, include_edges=True) expect_ihh0 = [0, 10 * 2 / 3, x, x] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [0, 10 * 2 / 3, x, x] assert_array_almost_equal(expect_ihh1, ihh1)
def test_ihh01_scan_a(): gaps = np.array([10, 10, 10], dtype='f8') h = np.array([[0, 0, 1], [0, 1, 1], [1, 1, 0], [1, 0, 0]]) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0.05, include_edges=False) expect_ihh0 = [np.nan, np.nan, np.nan, 5] assert_array_nanclose(expect_ihh0, ihh0) expect_ihh1 = [np.nan, 5, 5, np.nan] assert_array_nanclose(expect_ihh1, ihh1) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, include_edges=True) expect_ihh0 = [0, np.nan, np.nan, 5] assert_array_nanclose(expect_ihh0, ihh0) expect_ihh1 = [np.nan, 5, 5, np.nan] assert_array_nanclose(expect_ihh1, ihh1)
def test_ihh01_scan_c(): gaps = np.array([10, 10, 10], dtype='f8') h = np.array([[0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1]]) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0.05) expect_ihh0 = [np.nan, np.nan, np.nan, np.nan] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [np.nan, np.nan, np.nan, np.nan] assert_array_almost_equal(expect_ihh1, ihh1) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, include_edges=True) expect_ihh0 = [0, 10, 20, 30] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [0, 10, 20, 30] assert_array_almost_equal(expect_ihh1, ihh1)
def test_ihh01_scan_e(): # min_maf gaps = np.array([10, 10], dtype='f8') h = np.array([[0, 0, 1], [0, 0, 1], [0, 0, 1]]) expect_ihh0 = [0, 10, 20] expect_ihh1 = [np.nan, np.nan, np.nan] ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, min_maf=0, include_edges=True) assert_array_almost_equal(expect_ihh0, ihh0) assert_array_almost_equal(expect_ihh1, ihh1) expect_ihh0 = [np.nan, np.nan, np.nan] expect_ihh1 = [np.nan, np.nan, np.nan] ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, min_maf=0.4, include_edges=True) assert_array_almost_equal(expect_ihh0, ihh0) assert_array_almost_equal(expect_ihh1, ihh1)
def test_ihh01_scan_b(): gaps = np.array([10, 10, 10], dtype='f8') h = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]]) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0.05, include_edges=False) x = (10 * (1 + 1 / 3) / 2) + (10 * (1 / 3 + 0) / 2) expect_ihh0 = [np.nan, np.nan, x, x] assert_array_nanclose(expect_ihh0, ihh0) expect_ihh1 = [np.nan, np.nan, np.nan, np.nan] assert_array_nanclose(expect_ihh1, ihh1) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, include_edges=False) expect_ihh0 = [np.nan, np.nan, x, x] assert_array_nanclose(expect_ihh0, ihh0) expect_ihh1 = [np.nan, np.nan, np.nan, np.nan] assert_array_nanclose(expect_ihh1, ihh1) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, include_edges=True) expect_ihh0 = [0, 10 * (1 + 1 / 3) / 2, x, x] assert_array_nanclose(expect_ihh0, ihh0) expect_ihh1 = [np.nan, np.nan, np.nan, np.nan] assert_array_nanclose(expect_ihh1, ihh1)
def ihs(h, pos, map_pos=None, min_ehh=0.05, min_maf=0.05, include_edges=False, gap_scale=20000, max_gap=200000, is_accessible=None, use_threads=True): """Compute the unstandardized integrated haplotype score (IHS) for each variant, comparing integrated haplotype homozygosity between the reference (0) and alternate (1) alleles. Parameters ---------- h : array_like, int, shape (n_variants, n_haplotypes) Haplotype array. pos : array_like, int, shape (n_variants,) Variant positions (physical distance). map_pos : array_like, float, shape (n_variants,) Variant positions (genetic map distance). min_ehh: float, optional Minimum EHH beyond which to truncate integrated haplotype homozygosity calculation. min_maf : float, optional Do not compute integrated haplotype homozogysity for variants with minor allele frequency below this value. include_edges : bool, optional If True, report scores even if EHH does not decay below `min_ehh` before reaching the edge of the data. gap_scale : int, optional Rescale distance between variants if gap is larger than this value. max_gap : int, optional Do not report scores if EHH spans a gap larger than this number of base pairs. is_accessible : array_like, bool, optional Genome accessibility array. If provided, distance between variants will be computed as the number of accessible bases between them. use_threads : bool, optional If True use multiple threads to compute. Returns ------- score : ndarray, float, shape (n_variants,) Unstandardized IHS scores. Notes ----- This function will calculate IHS for all variants. To exclude variants below a given minor allele frequency, filter the input haplotype array before passing to this function. This function computes IHS comparing the reference and alternate alleles. These can be polarised by switching the sign for any variant where the reference allele is derived. This function returns NaN for any IHS calculations where haplotype homozygosity does not decay below `min_ehh` before reaching the first or last variant. To disable this behaviour, set `include_edges` to True. Note that the unstandardized score is returned. Usually these scores are then standardized in different allele frequency bins. See Also -------- standardize_by_allele_count """ # check inputs h = asarray_ndim(h, 2) check_integer_dtype(h) pos = asarray_ndim(pos, 1) check_dim0_aligned(h, pos) # compute gaps between variants for integration gaps = compute_ihh_gaps(pos, map_pos, gap_scale, max_gap, is_accessible) # setup kwargs kwargs = dict(min_ehh=min_ehh, min_maf=min_maf, include_edges=include_edges) if use_threads and multiprocessing.cpu_count() > 1: # run with threads # create pool pool = ThreadPool(2) # scan forward result_fwd = pool.apply_async(ihh01_scan, (h, gaps), kwargs) # scan backward result_rev = pool.apply_async(ihh01_scan, (h[::-1], gaps[::-1]), kwargs) # wait for both to finish pool.close() pool.join() # obtain results ihh0_fwd, ihh1_fwd = result_fwd.get() ihh0_rev, ihh1_rev = result_rev.get() # cleanup pool.terminate() else: # run without threads # scan forward ihh0_fwd, ihh1_fwd = ihh01_scan(h, gaps, **kwargs) # scan backward ihh0_rev, ihh1_rev = ihh01_scan(h[::-1], gaps[::-1], **kwargs) # handle reverse scan ihh0_rev = ihh0_rev[::-1] ihh1_rev = ihh1_rev[::-1] # compute unstandardized score ihh0 = ihh0_fwd + ihh0_rev ihh1 = ihh1_fwd + ihh1_rev score = np.log(ihh1 / ihh0) return score