Пример #1
0
 def test_masked_windowed_divergence(self):
     h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1],
                         [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2],
                         [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]])
     h1 = h.take([0, 1], axis=1)
     h2 = h.take([2, 3], axis=1)
     ac1 = h1.count_alleles()
     ac2 = h2.count_alleles()
     pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
     mask = np.tile(np.repeat(np.array([True, False]), 5), 3)
     expect, _, _, _ = allel.windowed_divergence(pos,
                                                 ac1,
                                                 ac2,
                                                 size=5,
                                                 start=1,
                                                 stop=31)
     expect = expect[::2]
     actual, _, _, _ = allel.windowed_divergence(pos,
                                                 ac1,
                                                 ac2,
                                                 size=10,
                                                 start=1,
                                                 stop=31,
                                                 is_accessible=mask)
     assert_array_almost_equal(expect, actual)
Пример #2
0
def pairDxy(c, chrsize, ac_subpops, pos, pop2color, plot=False, blenw=10000, nwindow=100):
    """Calculates DXY
    """
    dxydict = {}
    windlen = int(chrsize / nwindow)
    for x, y in combinations(ac_subpops.keys(), 2):
        # segregating only ?
        acu = ac_subpops[x] + ac_subpops[y]
        flt = acu.is_segregating() & (acu.max_allele() == 1)
        print("{} retaining {} SNPs".format("{}-{}".format(x, y),
                                            np.count_nonzero(flt)))
        posflt = pos[flt]
        ac1 = allel.AlleleCountsArray(ac_subpops[x].compress(flt,
                                                             axis=0)[:, :2])
        ac2 = allel.AlleleCountsArray(ac_subpops[y].compress(flt,
                                                             axis=0)[:, :2])
        # all sites
#        ac1 = ac_subpops[x]
#        ac2 = ac_subpops[y]
#        posflt = pos
        # whole chrom
        dxy = allel.windowed_divergence(posflt, ac1, ac2, size=blenw,
                                        start=1, stop=chrsize)
        dxy_m, dxy_se, *f = jackknife(dxy[0])
        dxy_windowed = allel.windowed_divergence(posflt, ac1, ac2,
                                                 size=windlen, start=1,
                                                 stop=chrsize)
        dxy4plot = (dxy_windowed[0], dxy_windowed[1])
        dxydict["{}-{}".format(x, y)] = (dxy_m, dxy_se, dxy4plot)
    if plot:
        plot_dxy(dxydict, pop2color, list(ac_subpops.keys()), c, chrsize)
    return(dxydict)
Пример #3
0
def dxy(p1, pos, gt, win_size, length_bp):
    """Calculate pairwise divergence between two populations.

    Parameters
    ----------
    p1 : int
        size of subpop1.
    pos : TYPE
        DESCRIPTION.
    gt : TYPE
        DESCRIPTION.
    win_size : TYPE
        DESCRIPTION.
    length_bp : TYPE
        DESCRIPTION.

    Returns
    -------
    dxy_win : TYPE
        DESCRIPTION.

    """
    ac1, ac2, pos_s = get_ac_seg(p1, pos, gt)
    dxy_win = allel.windowed_divergence(pos_s,
                                        ac1,
                                        ac2,
                                        size=win_size,
                                        start=1,
                                        stop=length_bp)

    return dxy_win[0]
Пример #4
0
    def test_windowed_divergence(self):

        # simplest case, two haplotypes in each population
        h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1],
                            [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2],
                            [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]])
        h1 = h.take([0, 1], axis=1)
        h2 = h.take([2, 3], axis=1)
        ac1 = h1.count_alleles()
        ac2 = h2.count_alleles()
        # mean pairwise divergence
        # expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1]
        pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
        expect = [(6 / 4) / 10, (9 / 4) / 10, 0 / 11]
        actual, _, _, _ = allel.windowed_divergence(pos,
                                                    ac1,
                                                    ac2,
                                                    size=10,
                                                    start=1,
                                                    stop=31)
        assert_array_almost_equal(expect, actual)
Пример #5
0
         size=winsize,
         start=start,
         stop=stop,
         step=int(winsize / 2))
     new_dat = format_results(stat=tajD,
                              stat_name="tajD",
                              chrom=chrom,
                              windows=windows,
                              nvar=counts,
                              pop=pop)
     df_list.append(new_dat)
 if 'dxy' in args.s and args.p2 != "None":
     dxy, windows, n_bases, counts = allel.windowed_divergence(
         pos,
         ac,
         ac2,
         size=winsize,
         start=start,
         stop=stop,
         step=int(winsize / 2))
     new_dat = format_results(stat=dxy,
                              stat_name="dxy",
                              chrom=chrom,
                              windows=windows,
                              nvar=counts,
                              pop=pop)
     df_list.append(new_dat)
 if 'FD' in args.s and args.p2 != "None":
     FD, windows, n_bases, counts = allel.windowed_df(
         pos,
         ac,
         ac2,
#
# Maybe if we look at Dxy we see something clearer?

# In[28]:

clu_varbool = np.logical_and(oc_genvars_seg["POS"] > loc_start - 1e5,
                             oc_genvars_seg["POS"] <= loc_end + 1e5)
clu_ehh_pos = oc_genvars_seg["POS"].subset(sel0=clu_varbool)

size = 5000
step = 1000

# divergence between col-296G and gam-296G on 0 background
dxy_div_col02_gam02 = allel.windowed_divergence(
    ac1=oc_genalco_sps_seg_inv_gty["col_0_2"].subset(sel0=clu_varbool),
    ac2=oc_genalco_sps_seg_inv_gty["gam_0_2"].subset(sel0=clu_varbool),
    pos=oc_genvars_seg["POS"].subset(sel0=clu_varbool),
    size=size,
    step=step)

# divergence between col-296G and gam-wt on 0 background
dxy_div_col02_gam00 = allel.windowed_divergence(
    ac1=oc_genalco_sps_seg_inv_gty["col_0_2"].subset(sel0=clu_varbool),
    ac2=oc_genalco_sps_seg_inv_gty["gam_0_0"].subset(sel0=clu_varbool),
    pos=oc_genvars_seg["POS"].subset(sel0=clu_varbool),
    size=size,
    step=step)

# divergence between col-wt and gam-296G on 0 background
dxy_div_col00_gam02 = allel.windowed_divergence(
    ac1=oc_genalco_sps_seg_inv_gty["col_0_0"].subset(sel0=clu_varbool),
    ac2=oc_genalco_sps_seg_inv_gty["gam_0_2"].subset(sel0=clu_varbool),
Пример #7
0
def win_pi_sims(path, neut_mut, n_pops, n_sims, T, win_size, L, N):
    foname = os.path.basename(path[:-1])
    print(("Base filename:" + foname), flush=True)
    x = np.arange(n_pops)
    combs = list(itertools.combinations(x, 2))
    pis = np.zeros((len(T), n_sims, n_pops, int(L / win_size)))
    div = np.zeros((len(T), n_sims, len(combs), int(L / win_size)))
    fst = np.zeros((len(T), n_sims, len(combs), int(L / win_size)))
    tajd = np.zeros((len(T), n_sims, n_pops, int(L / win_size)))
    for t in range(len(T)):
        for i in range(n_sims):
            files = glob(path + str(T[t]) + "N_sim_" + str(i) +
                         "_RAND_*[0-9]_overlaid.trees")
            print(files)
            assert (len(files) == 1), str(
                len(files)) + " file(s) found with glob T: " + str(
                    T[t]) + " sim:" + str(i)
            filename = files[0]
            print(filename)
            ts = pyslim.load(filename).simplify()
            #print(("Pi0: ", ts.pairwise_diversity(samples=ts.samples(population=0)),"Pi1: ", ts.pairwise_diversity(samples=ts.samples(population=1))), flush=True)
            s1 = timer()
            acs, pos = ac_from_ts(ts, n_pops, N)
            for j in range(n_pops):
                pi, windows, n_bases, counts = allel.windowed_diversity(
                    pos, acs[j], size=win_size, start=1, stop=L)
                pis[t, i, j, :] = pi
                D, windows, counts = allel.windowed_tajima_d(pos,
                                                             acs[j],
                                                             size=win_size,
                                                             start=1,
                                                             stop=L)
                tajd[t, i, j, :] = D
            s2 = timer()
            print(("Calculating windowed Pi/TajD... Time elapsed (min):" +
                   str(round((s2 - s1) / 60, 3))),
                  flush=True)
            s1 = timer()
            for k in range(len(combs)):
                dxy, windows, n_bases, counts = allel.windowed_divergence(
                    pos,
                    acs[combs[k][0]],
                    acs[combs[k][1]],
                    size=win_size,
                    start=1,
                    stop=L)
                div[t, i, k, :] = dxy
                fstat, windows, counts = allel.windowed_hudson_fst(
                    pos,
                    acs[combs[k][0]],
                    acs[combs[k][1]],
                    size=win_size,
                    start=1,
                    stop=L)
                fst[t, i, k, :] = fstat
            s2 = timer()
            print(("Calculating windowed Dxy and Fst... Time elapsed (min):" +
                   str(round((s2 - s1) / 60, 3))),
                  flush=True)

    s1 = timer()
    print((pis.shape), flush=True)
    print((tajd.shape), flush=True)
    print((div.shape), flush=True)
    output = open(path + foname + '_pis.pkl', 'wb')
    pickle.dump(pis, output)
    output.close()
    output = open(path + foname + '_tajd.pkl', 'wb')
    pickle.dump(tajd, output)
    output.close()
    output = open(path + foname + '_div.pkl', 'wb')
    pickle.dump(div, output)
    output.close()
    output = open(path + foname + '_fst.pkl', 'wb')
    pickle.dump(fst, output)
    output.close()

    if (0):
        plt.subplot(2, 1, 1)
        plt.plot(np.transpose(pis[0, 0, :]), "-")
        plt.title('0N after split')
        plt.ylabel('Pi')
        plt.subplot(2, 1, 2)
        plt.plot(np.transpose(pis[9, 0, :]), "-")
        plt.title('10N after split')
        plt.xlabel('Window')
        plt.ylabel('Pi')
        plt.tight_layout()
        plt.savefig(path + foname + '_landscape.pdf')
        plt.close()

    s2 = timer()
    print(("Saving stats and plots to file... Time elapsed (min):" +
           str(round((s2 - s1) / 60, 3))),
          flush=True)