def moving_weir_cockerham_fst(g, subpops, size, start=0, stop=None, step=None, max_allele=None): """Estimate average Fst in moving windows over a single chromosome/contig, following the method of Weir and Cockerham (1984). Parameters ---------- g : array_like, int, shape (n_variants, n_samples, ploidy) Genotype array. subpops : sequence of sequences of ints Sample indices for each subpopulation. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. step : int, optional The number of variants between start positions of windows. If not given, defaults to the window size, i.e., non-overlapping windows. max_allele : int, optional The highest allele index to consider. Returns ------- fst : ndarray, float, shape (n_windows,) Average Fst in each window. """ # calculate per-variant values a, b, c = weir_cockerham_fst(g, subpops, max_allele=max_allele) # compute the numerator and denominator in moving windows num = moving_statistic(a, statistic=np.nansum, size=size, start=start, stop=stop, step=step) den = moving_statistic(a + b + c, statistic=np.nansum, size=size, start=start, stop=stop, step=step) # calculate fst in each window fst = num / den return fst
def blockwise_weir_cockerham_fst(g, subpops, blen, max_allele=None): """Estimate average Fst and standard error using the block-jackknife. Parameters ---------- g : array_like, int, shape (n_variants, n_samples, ploidy) Genotype array. subpops : sequence of sequences of ints Sample indices for each subpopulation. blen : int Block size (number of variants). max_allele : int, optional The highest allele index to consider. Returns ------- fst : float Estimated value of the statistic using all data. se : float Estimated standard error. vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. """ # calculate per-variant values a, b, c = weir_cockerham_fst(g, subpops, max_allele=max_allele) # calculate overall estimate a_sum = np.nansum(a) b_sum = np.nansum(b) c_sum = np.nansum(c) fst = a_sum / (a_sum + b_sum + c_sum) # compute the numerator and denominator within each block num_bsum = moving_statistic(a, statistic=np.nansum, size=blen) den_bsum = moving_statistic(a + b + c, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) return fst, se, vb, vj
def average_weir_cockerham_fst(g, subpops, blen, max_allele=None): """Estimate average Fst and standard error using the block-jackknife. Parameters ---------- g : array_like, int, shape (n_variants, n_samples, ploidy) Genotype array. subpops : sequence of sequences of ints Sample indices for each subpopulation. blen : int Block size (number of variants). max_allele : int, optional The highest allele index to consider. Returns ------- fst : float Estimated value of the statistic using all data. se : float Estimated standard error. vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. """ # calculate per-variant values a, b, c = weir_cockerham_fst(g, subpops, max_allele=max_allele) # calculate overall estimate a_sum = np.nansum(a) b_sum = np.nansum(b) c_sum = np.nansum(c) fst = a_sum / (a_sum + b_sum + c_sum) # compute the numerator and denominator within each block num_bsum = moving_statistic(a, statistic=np.nansum, size=blen) den_bsum = moving_statistic(a + b + c, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) return fst, se, vb, vj
def moving_patterson_fst(ac1, ac2, size, start=0, stop=None, step=None): """Estimate average Fst in moving windows over a single chromosome/contig, following the method of Patterson (2012). Parameters ---------- ac1 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the first population. ac2 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the second population. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. step : int, optional The number of variants between start positions of windows. If not given, defaults to the window size, i.e., non-overlapping windows. Returns ------- fst : ndarray, float, shape (n_windows,) Average Fst in each window. """ # calculate per-variant values num, den = patterson_fst(ac1, ac2) # compute the numerator and denominator in moving windows num_sum = moving_statistic(num, statistic=np.nansum, size=size, start=start, stop=stop, step=step) den_sum = moving_statistic(den, statistic=np.nansum, size=size, start=start, stop=stop, step=step) # calculate fst in each window fst = num_sum / den_sum return fst
def moving_haplotype_diversity(h, size, start=0, stop=None, step=None): """Estimate haplotype diversity in moving windows. Parameters ---------- h : array_like, int, shape (n_variants, n_haplotypes) Haplotype array. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. step : int, optional The number of variants between start positions of windows. If not given, defaults to the window size, i.e., non-overlapping windows. Returns ------- hd : ndarray, float, shape (n_windows,) Haplotype diversity. """ hd = moving_statistic(values=h, statistic=haplotype_diversity, size=size, start=start, stop=stop, step=step) return hd
def blockwise_patterson_fst(ac1, ac2, blen): """Estimate average Fst between two populations and standard error using the block-jackknife. Parameters ---------- ac1 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the first population. ac2 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the second population. blen : int Block size (number of variants). Returns ------- fst : float Estimated value of the statistic using all data. se : float Estimated standard error. vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. """ # calculate per-variant values num, den = patterson_fst(ac1, ac2) # calculate overall estimate fst = np.nansum(num) / np.nansum(den) # compute the numerator and denominator within each block num_bsum = moving_statistic(num, statistic=np.nansum, size=blen) den_bsum = moving_statistic(den, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) return fst, se, vb, vj
def average_patterson_fst(ac1, ac2, blen): """Estimate average Fst between two populations and standard error using the block-jackknife. Parameters ---------- ac1 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the first population. ac2 : array_like, int, shape (n_variants, n_alleles) Allele counts array from the second population. blen : int Block size (number of variants). Returns ------- fst : float Estimated value of the statistic using all data. se : float Estimated standard error. vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. """ # calculate per-variant values num, den = patterson_fst(ac1, ac2) # calculate overall estimate fst = np.nansum(num) / np.nansum(den) # compute the numerator and denominator within each block num_bsum = moving_statistic(num, statistic=np.nansum, size=blen) den_bsum = moving_statistic(den, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) return fst, se, vb, vj
def moving_tajima_d(ac, size, start=0, stop=None, step=None, min_sites=3): """Calculate the value of Tajima's D in moving windows of `size` variants. Parameters ---------- ac : array_like, int, shape (n_variants, n_alleles) Allele counts array. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. step : int, optional The number of variants between start positions of windows. If not given, defaults to the window size, i.e., non-overlapping windows. min_sites : int, optional Minimum number of segregating sites for which to calculate a value. If there are fewer, np.nan is returned. Defaults to 3. Returns ------- d : ndarray, float, shape (n_windows,) Tajima's D. Examples -------- >>> import allel >>> g = allel.GenotypeArray([[[0, 0], [0, 0]], ... [[0, 0], [0, 1]], ... [[0, 0], [1, 1]], ... [[0, 1], [1, 1]], ... [[1, 1], [1, 1]], ... [[0, 0], [1, 2]], ... [[0, 1], [1, 2]], ... [[0, 1], [-1, -1]], ... [[-1, -1], [-1, -1]]]) >>> ac = g.count_alleles() >>> D = allel.moving_tajima_d(ac, size=4, step=2) >>> D array([0.1676558 , 2.01186954, 5.70029703]) """ d = moving_statistic(values=ac, statistic=tajima_d, size=size, start=start, stop=stop, step=step, min_sites=min_sites) return d
def moving_tajima_d(ac, size, start=0, stop=None, step=None): """Calculate the value of Tajima's D in moving windows of `size` variants. Parameters ---------- ac : array_like, int, shape (n_variants, n_alleles) Allele counts array. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. step : int, optional The number of variants between start positions of windows. If not given, defaults to the window size, i.e., non-overlapping windows. Returns ------- d : ndarray, float, shape (n_windows,) Tajima's D. Examples -------- >>> import allel >>> g = allel.GenotypeArray([[[0, 0], [0, 0]], ... [[0, 0], [0, 1]], ... [[0, 0], [1, 1]], ... [[0, 1], [1, 1]], ... [[1, 1], [1, 1]], ... [[0, 0], [1, 2]], ... [[0, 1], [1, 2]], ... [[0, 1], [-1, -1]], ... [[-1, -1], [-1, -1]]]) >>> ac = g.count_alleles() >>> D = allel.stats.moving_tajima_d(ac, size=3) >>> D array([ 0.59158014, 1.89305645, 5.79748537]) """ d = moving_statistic(values=ac, statistic=tajima_d, size=size, start=start, stop=stop, step=step) return d
def moving_garud_h(h, size, start=0, stop=None, step=None): """Compute the H1, H12, H123 and H2/H1 statistics for detecting signatures of soft sweeps, as defined in Garud et al. (2015), in moving windows, Parameters ---------- h : array_like, int, shape (n_variants, n_haplotypes) Haplotype array. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. step : int, optional The number of variants between start positions of windows. If not given, defaults to the window size, i.e., non-overlapping windows. Returns ------- h1 : ndarray, float, shape (n_windows,) H1 statistics (sum of squares of haplotype frequencies). h12 : ndarray, float, shape (n_windows,) H12 statistics (sum of squares of haplotype frequencies, combining the two most common haplotypes into a single frequency). h123 : ndarray, float, shape (n_windows,) H123 statistics (sum of squares of haplotype frequencies, combining the three most common haplotypes into a single frequency). h2_h1 : ndarray, float, shape (n_windows,) H2/H1 statistics, indicating the "softness" of a sweep. """ gh = moving_statistic(values=h, statistic=garud_h, size=size, start=start, stop=stop, step=step) h1 = gh[:, 0] h12 = gh[:, 1] h123 = gh[:, 2] h2_h1 = gh[:, 3] return h1, h12, h123, h2_h1
def moving_tajima_d(ac, size, start=0, stop=None, step=None): """Calculate the value of Tajima's D in moving windows of `size` variants. Parameters ---------- ac : array_like, int, shape (n_variants, n_alleles) Allele counts array. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. step : int, optional The number of variants between start positions of windows. If not given, defaults to the window size, i.e., non-overlapping windows. Returns ------- D : ndarray, float, shape (n_windows,) Tajima's D. Examples -------- >>> import allel >>> g = allel.GenotypeArray([[[0, 0], [0, 0]], ... [[0, 0], [0, 1]], ... [[0, 0], [1, 1]], ... [[0, 1], [1, 1]], ... [[1, 1], [1, 1]], ... [[0, 0], [1, 2]], ... [[0, 1], [1, 2]], ... [[0, 1], [-1, -1]], ... [[-1, -1], [-1, -1]]]) >>> ac = g.count_alleles() >>> D = allel.stats.moving_tajima_d(ac, size=3) >>> D array([ 0.59158014, 1.89305645, 5.79748537]) """ D = moving_statistic(values=ac, statistic=tajima_d, size=size, start=start, stop=stop, step=step) return D
def average_patterson_d(aca, acb, acc, acd, blen): """Estimate D(A, B; C, D) and standard error using the block-jackknife. Parameters ---------- aca : array_like, int, shape (n_variants, 2), Allele counts for population A. acb : array_like, int, shape (n_variants, 2) Allele counts for population B. acc : array_like, int, shape (n_variants, 2) Allele counts for population C. acd : array_like, int, shape (n_variants, 2) Allele counts for population D. blen : int Block size (number of variants). Returns ------- d : float Estimated value of the statistic using all data. se : float Estimated standard error. z : float Z-score (number of standard errors from zero). vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. Notes ----- See Patterson (2012), main text and Appendix A. See Also -------- allel.stats.admixture.patterson_d """ # calculate per-variant values num, den = patterson_d(aca, acb, acc, acd) # N.B., nans can occur if any of the populations have completely missing # genotype calls at a variant (i.e., allele number is zero). Here we # assume that is rare enough to be negligible. # calculate overall estimate d_avg = np.nansum(num) / np.nansum(den) # compute the numerator and denominator within each block num_bsum = moving_statistic(num, statistic=np.nansum, size=blen) den_bsum = moving_statistic(den, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) # compute Z score z = d_avg / se return d_avg, se, z, vb, vj
def average_patterson_f3(acc, aca, acb, blen, normed=True): """Estimate F3(C; A, B) and standard error using the block-jackknife. Parameters ---------- acc : array_like, int, shape (n_variants, 2) Allele counts for the test population (C). aca : array_like, int, shape (n_variants, 2) Allele counts for the first source population (A). acb : array_like, int, shape (n_variants, 2) Allele counts for the second source population (B). blen : int Block size (number of variants). normed : bool, optional If False, use un-normalised f3 values. Returns ------- f3 : float Estimated value of the statistic using all data. se : float Estimated standard error. z : float Z-score (number of standard errors from zero). vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. Notes ----- See Patterson (2012), main text and Appendix A. See Also -------- allel.stats.admixture.patterson_f3 """ # calculate per-variant values T, B = patterson_f3(acc, aca, acb) # N.B., nans can occur if any of the populations have completely missing # genotype calls at a variant (i.e., allele number is zero). Here we # assume that is rare enough to be negligible. # calculate overall value of statistic if normed: f3 = np.nansum(T) / np.nansum(B) else: f3 = np.nanmean(T) # calculate value of statistic within each block if normed: T_bsum = moving_statistic(T, statistic=np.nansum, size=blen) B_bsum = moving_statistic(B, statistic=np.nansum, size=blen) vb = T_bsum / B_bsum _, se, vj = jackknife((T_bsum, B_bsum), statistic=lambda t, b: np.sum(t) / np.sum(b)) else: vb = moving_statistic(T, statistic=np.nanmean, size=blen) _, se, vj = jackknife(vb, statistic=np.mean) # compute Z score z = f3 / se return f3, se, z, vb, vj
def moving_patterson_d(aca, acb, acc, acd, size, start=0, stop=None, step=None): """Estimate D(A, B; C, D) in moving windows. Parameters ---------- aca : array_like, int, shape (n_variants, 2), Allele counts for population A. acb : array_like, int, shape (n_variants, 2) Allele counts for population B. acc : array_like, int, shape (n_variants, 2) Allele counts for population C. acd : array_like, int, shape (n_variants, 2) Allele counts for population D. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. step : int, optional The number of variants between start positions of windows. If not given, defaults to the window size, i.e., non-overlapping windows. Returns ------- d : ndarray, float, shape (n_windows,) Estimated value of the statistic in each window. """ # calculate per-variant values num, den = patterson_d(aca, acb, acc, acd) # N.B., nans can occur if any of the populations have completely missing # genotype calls at a variant (i.e., allele number is zero). Here we # assume that is rare enough to be negligible. # compute the numerator and denominator within each window num_sum = moving_statistic(num, statistic=np.nansum, size=size, start=start, stop=stop, step=step) den_sum = moving_statistic(den, statistic=np.nansum, size=size, start=start, stop=stop, step=step) # calculate the statistic values in each block d = num_sum / den_sum return d
def moving_patterson_f3(acc, aca, acb, size, start=0, stop=None, step=None, normed=True): """Estimate F3(C; A, B) in moving windows. Parameters ---------- acc : array_like, int, shape (n_variants, 2) Allele counts for the test population (C). aca : array_like, int, shape (n_variants, 2) Allele counts for the first source population (A). acb : array_like, int, shape (n_variants, 2) Allele counts for the second source population (B). size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. step : int, optional The number of variants between start positions of windows. If not given, defaults to the window size, i.e., non-overlapping windows. normed : bool, optional If False, use un-normalised f3 values. Returns ------- f3 : ndarray, float, shape (n_windows,) Estimated value of the statistic in each window. """ # calculate per-variant values T, B = patterson_f3(acc, aca, acb) # calculate value of statistic within each block if normed: T_bsum = moving_statistic(T, statistic=np.nansum, size=size, start=start, stop=stop, step=step) B_bsum = moving_statistic(B, statistic=np.nansum, size=size, start=start, stop=stop, step=step) f3 = T_bsum / B_bsum else: f3 = moving_statistic(T, statistic=np.nanmean, size=size, start=start, stop=stop, step=step) return f3
def blockwise_patterson_f3(acc, aca, acb, blen, normed=True): """Estimate F3(C; A, B) and standard error using the block-jackknife. Parameters ---------- acc : array_like, int, shape (n_variants, 2) Allele counts for the test population (C). aca : array_like, int, shape (n_variants, 2) Allele counts for the first source population (A). acb : array_like, int, shape (n_variants, 2) Allele counts for the second source population (B). blen : int Block size (number of variants). normed : bool, optional If False, use un-normalised f3 values. Returns ------- f3 : float Estimated value of the statistic using all data. se : float Estimated standard error. z : float Z-score (number of standard errors from zero). vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. Notes ----- See Patterson (2012), main text and Appendix A. See Also -------- allel.stats.admixture.patterson_f3 """ # calculate per-variant values T, B = patterson_f3(acc, aca, acb) # N.B., nans can occur if any of the populations have completely missing # genotype calls at a variant (i.e., allele number is zero). Here we # assume that is rare enough to be negligible. # calculate overall value of statistic if normed: f3 = np.nansum(T) / np.nansum(B) else: f3 = np.nanmean(T) # calculate value of statistic within each block if normed: T_bsum = moving_statistic(T, statistic=np.nansum, size=blen) B_bsum = moving_statistic(B, statistic=np.nansum, size=blen) vb = T_bsum / B_bsum _, se, vj = jackknife((T_bsum, B_bsum), statistic=lambda t, b: np.sum(t) / np.sum(b)) else: vb = moving_statistic(T, statistic=np.nanmean, size=blen) _, se, vj = jackknife(vb, statistic=np.mean) # compute Z score z = f3 / se return f3, se, z, vb, vj
def blockwise_patterson_d(aca, acb, acc, acd, blen): """Estimate D(A, B; C, D) and standard error using the block-jackknife. Parameters ---------- aca : array_like, int, shape (n_variants, 2), Allele counts for population A. acb : array_like, int, shape (n_variants, 2) Allele counts for population B. acc : array_like, int, shape (n_variants, 2) Allele counts for population C. acd : array_like, int, shape (n_variants, 2) Allele counts for population D. blen : int Block size (number of variants). Returns ------- d : float Estimated value of the statistic using all data. se : float Estimated standard error. z : float Z-score (number of standard errors from zero). vb : ndarray, float, shape (n_blocks,) Value of the statistic in each block. vj : ndarray, float, shape (n_blocks,) Values of the statistic from block-jackknife resampling. Notes ----- See Patterson (2012), main text and Appendix A. See Also -------- allel.stats.admixture.patterson_d """ # calculate per-variant values num, den = patterson_d(aca, acb, acc, acd) # N.B., nans can occur if any of the populations have completely missing # genotype calls at a variant (i.e., allele number is zero). Here we # assume that is rare enough to be negligible. # calculate overall estimate d = np.nansum(num) / np.nansum(den) # compute the numerator and denominator within each block num_bsum = moving_statistic(num, statistic=np.nansum, size=blen) den_bsum = moving_statistic(den, statistic=np.nansum, size=blen) # calculate the statistic values in each block vb = num_bsum / den_bsum # estimate standard error _, se, vj = jackknife((num_bsum, den_bsum), statistic=lambda n, d: np.sum(n) / np.sum(d)) # compute Z score z = d / se return d, se, z, vb, vj
def plot_moving_haplotype_frequencies(pos, h, size, start=0, stop=None, n=None, palette='Paired', singleton_color='w', ax=None): """Plot haplotype frequencies in moving windows over the genome. Parameters ---------- pos : array_like, int, shape (n_items,) Variant positions, using 1-based coordinates, in ascending order. h : array_like, int, shape (n_variants, n_haplotypes) Haplotype array. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. n : int, optional Color only the `n` most frequent haplotypes (by default, all non-singleton haplotypes are colored). palette : string, optional A Seaborn palette name. singleton_color : string, optional Color to paint singleton haplotypes. ax : axes, optional The axes on which to draw. If not provided, a new figure will be created. Returns ------- ax : axes """ import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns # setup figure if ax is None: fig, ax = plt.subplots() # compute haplotype frequencies # N.B., here we use a haplotype rank data structure to enable the use of # pcolormesh() which is a lot faster than any other type of plotting # function hr = moving_hfs_rank(h, size=size, start=start, stop=stop) # truncate to n most common haplotypes if n: hr[hr > n] = 0 # compute window start and stop positions windows = moving_statistic(pos, statistic=lambda v: (v[0], v[-1]), size=size, start=start, stop=stop) # create color map colors = [singleton_color] + sns.color_palette(palette, n_colors=hr.max()) cmap = mpl.colors.ListedColormap(colors) # draw colors x = np.append(windows[:, 0], windows[-1, -1]) y = np.arange(h.shape[1] + 1) ax.pcolormesh(x, y, hr.T, cmap=cmap) # tidy up ax.set_xlim(windows[0, 0], windows[-1, -1]) ax.set_ylim(0, h.shape[1]) ax.set_ylabel('haplotype count') ax.set_xlabel('position (bp)') return ax
def plot_moving_haplotype_frequencies( pos, h, size, start=0, stop=None, n=None, palette="Paired", singleton_color="w", ax=None ): """Plot haplotype frequencies in moving windows over the genome. Parameters ---------- pos : array_like, int, shape (n_items,) Variant positions, using 1-based coordinates, in ascending order. h : array_like, int, shape (n_variants, n_haplotypes) Haplotype array. size : int The window size (number of variants). start : int, optional The index at which to start. stop : int, optional The index at which to stop. n : int, optional Color only the `n` most frequent haplotypes (by default, all non-singleton haplotypes are colored). palette : string, optional A Seaborn palette name. singleton_color : string, optional Color to paint singleton haplotypes. ax : axes, optional The axes on which to draw. If not provided, a new figure will be created. Returns ------- ax : axes """ import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns # setup figure if ax is None: fig, ax = plt.subplots() # compute haplotype frequencies # N.B., here we use a haplotype rank data structure to enable the use of # pcolormesh() which is a lot faster than any other type of plotting # function hr = moving_hfs_rank(h, size=size, start=start, stop=stop) # truncate to n most common haplotypes if n: hr[hr > n] = 0 # compute window start and stop positions windows = moving_statistic(pos, statistic=lambda x: (x[0], x[-1]), size=size, start=start, stop=stop) # create color map colors = [singleton_color] + sns.color_palette(palette, n_colors=hr.max()) cmap = mpl.colors.ListedColormap(colors) # draw colors x = np.append(windows[:, 0], windows[-1, -1]) y = np.arange(h.shape[1] + 1) ax.pcolormesh(x, y, hr.T, cmap=cmap) # tidy up ax.set_xlim(windows[0, 0], windows[-1, -1]) ax.set_ylim(0, h.shape[1]) ax.set_ylabel("haplotype count") ax.set_xlabel("position (bp)") return ax