示例#1
0
def get_stats(ts):
    bs = msprime.BranchLengthStatCalculator(ts)
    stats = {
        'num_trees': ts.num_trees,
        'sequence_length': ts.sequence_length,
        'num_samples': ts.num_samples,
        'num_mutations': ts.num_mutations,
        'divergence':
        -1  # bs.divergence(sample_sets=[list(ts.samples())], windows=[0.0, ts.sequence_length])
    }
    print(stats)
    return stats
示例#2
0
def getBranchLengthSumStats(direc,n): #don't use this yet
    #read through a directory of SLiM tree sequences and return a matrix
    #of summary stats calculated from branch lengths
    out=[]
    for i in range(n):
        filename = str(i) + ".trees"
        filepath = os.path.join(direc,filename)
        ts = pyslim.load(filepath)
        samples=[x for x in ts.samples()]
        locs=[[ts.individual(ts.node(s).individual).location[0],
               ts.individual(ts.node(s).individual).location[1]] for s in samples]
        bs=msp.BranchLengthStatCalculator(ts)
        gd=np.array(bs.divergence([[x] for x in ts.samples()],
                                 windows=[0.0,ts.sequence_length]))
        gd = gd[np.logical_not(np.isnan(gd))]
        sd = np.array(scipy.spatial.distance.pdist(locs))
        simSFS = np.array(bs.site_frequency_spectrum(samples)[0])
        out.append(np.concatenate((sd,gd,simSFS)))
    return out
def compute_heterozygosity(ts, time, num_targets, mutation_rate=1e-8):
    # mts = sps.SpatialSlimTreeSequence(msprime.mutate(ts, mutation_rate), dim=2)

    alive = ts.individuals_alive(time)
    targets = np.random.choice(np.where(alive)[0], num_targets, replace=False)
    target_node_list = []
    new_nodes = []
    k = 0
    for ind in targets:
        target_node_list.extend(ts.individual(ind).nodes)
        new_nodes.append([k, k + 1])
        k += 2

    rts = ts.recapitate(1e-9, Ne=1000)
    sts = rts.simplify(target_node_list)
    bsc = msprime.BranchLengthStatCalculator(sts)
    new_targets = [sts.node(u[0]).individual for u in new_nodes]
    het = np.array([
        bsc.divergence([list(sts.individual(u).nodes)],
                       [0.0, sts.sequence_length])[0][0] for u in new_targets
    ])

    locs = ts.individual_locations()
    return (het, targets)
示例#4
0
    print(treefile + " -> " + outfile)

    ts = pyslim.SlimTreeSequence(pyslim.load(treefile).simplify())
    rts = ts.recapitate(recombination_rate=1e-8, Ne=ts.num_samples)
    samples = np.random.choice(rts.num_individuals, 300, replace=False)
    sample_genomes = [list(rts.individual(ind).nodes) for ind in samples]

    locs = np.array([rts.individual(i).location[:2] for i in samples])
    dists = np.sqrt((locs[:, 0][:, np.newaxis] - locs[:, 0])**2 +
                    (locs[:, 1][:, np.newaxis] - locs[:, 1])**2)

    the_genomes = [x for y in sample_genomes for x in y]
    remapped_genomes = [[the_genomes.index(u) for u in x]
                        for x in sample_genomes]
    sub_ts = rts.simplify(the_genomes)
    bc = msprime.BranchLengthStatCalculator(sub_ts)
    divs = bc.divergence_matrix(remapped_genomes,
                                windows=[0.0, sub_ts.sequence_length])
    ut = np.triu_indices(dists.shape[0])

    fig = plt.figure()
    ax = fig.add_subplot(111)
    pts = ax.scatter(dists[ut],
                     divs[0][ut] / 1000,
                     alpha=0.5,
                     s=5,
                     marker='.',
                     linewidths=0)
    plt.xlabel("geographic distance")
    plt.ylabel("divergence")
    fig.set_size_inches([4, 4])
        given by discretizing into an (nxm) grid.
        '''
        samples = [[[] for _ in range(n)] for _ in range(m)]
        locs = np.array(ts.tables.individuals.location)
        locs.resize((int(len(ts.tables.individuals.location,)/3), 3))
        max_x = np.ceil(10*max(locs[:,0]))/10
        max_y = np.ceil(10*max(locs[:,1]))/10
        for ind in ts.individuals():
            if np.random.uniform() < prob:
                i = int(np.floor(ind.location[0] * n / max_x))
                j = int(np.floor(ind.location[1] * m / max_x))
                samples[i][j].extend(ind.nodes)
        return samples


    sample_grid = grid_samples(ts, 4, 4, 0.05)
    samples = [a for b in sample_grid for a in b]
    windows = np.linspace(0.0, ts.sequence_length, 1000)
    win_mids = (windows[1:] + windows[:-1])/2

    bs = msprime.BranchLengthStatCalculator(ts)

    divs = np.array(bs.divergence(samples, windows=windows))

    fig = plt.figure(figsize=(15,4))

    for i in range(divs.shape[1]):
        plt.plot(win_mids, divs[:,i])

    plt.savefig(outdir + "/divergences_1000.png", dpi=288)