def get_stats(ts): bs = msprime.BranchLengthStatCalculator(ts) stats = { 'num_trees': ts.num_trees, 'sequence_length': ts.sequence_length, 'num_samples': ts.num_samples, 'num_mutations': ts.num_mutations, 'divergence': -1 # bs.divergence(sample_sets=[list(ts.samples())], windows=[0.0, ts.sequence_length]) } print(stats) return stats
def getBranchLengthSumStats(direc,n): #don't use this yet #read through a directory of SLiM tree sequences and return a matrix #of summary stats calculated from branch lengths out=[] for i in range(n): filename = str(i) + ".trees" filepath = os.path.join(direc,filename) ts = pyslim.load(filepath) samples=[x for x in ts.samples()] locs=[[ts.individual(ts.node(s).individual).location[0], ts.individual(ts.node(s).individual).location[1]] for s in samples] bs=msp.BranchLengthStatCalculator(ts) gd=np.array(bs.divergence([[x] for x in ts.samples()], windows=[0.0,ts.sequence_length])) gd = gd[np.logical_not(np.isnan(gd))] sd = np.array(scipy.spatial.distance.pdist(locs)) simSFS = np.array(bs.site_frequency_spectrum(samples)[0]) out.append(np.concatenate((sd,gd,simSFS))) return out
def compute_heterozygosity(ts, time, num_targets, mutation_rate=1e-8): # mts = sps.SpatialSlimTreeSequence(msprime.mutate(ts, mutation_rate), dim=2) alive = ts.individuals_alive(time) targets = np.random.choice(np.where(alive)[0], num_targets, replace=False) target_node_list = [] new_nodes = [] k = 0 for ind in targets: target_node_list.extend(ts.individual(ind).nodes) new_nodes.append([k, k + 1]) k += 2 rts = ts.recapitate(1e-9, Ne=1000) sts = rts.simplify(target_node_list) bsc = msprime.BranchLengthStatCalculator(sts) new_targets = [sts.node(u[0]).individual for u in new_nodes] het = np.array([ bsc.divergence([list(sts.individual(u).nodes)], [0.0, sts.sequence_length])[0][0] for u in new_targets ]) locs = ts.individual_locations() return (het, targets)
print(treefile + " -> " + outfile) ts = pyslim.SlimTreeSequence(pyslim.load(treefile).simplify()) rts = ts.recapitate(recombination_rate=1e-8, Ne=ts.num_samples) samples = np.random.choice(rts.num_individuals, 300, replace=False) sample_genomes = [list(rts.individual(ind).nodes) for ind in samples] locs = np.array([rts.individual(i).location[:2] for i in samples]) dists = np.sqrt((locs[:, 0][:, np.newaxis] - locs[:, 0])**2 + (locs[:, 1][:, np.newaxis] - locs[:, 1])**2) the_genomes = [x for y in sample_genomes for x in y] remapped_genomes = [[the_genomes.index(u) for u in x] for x in sample_genomes] sub_ts = rts.simplify(the_genomes) bc = msprime.BranchLengthStatCalculator(sub_ts) divs = bc.divergence_matrix(remapped_genomes, windows=[0.0, sub_ts.sequence_length]) ut = np.triu_indices(dists.shape[0]) fig = plt.figure() ax = fig.add_subplot(111) pts = ax.scatter(dists[ut], divs[0][ut] / 1000, alpha=0.5, s=5, marker='.', linewidths=0) plt.xlabel("geographic distance") plt.ylabel("divergence") fig.set_size_inches([4, 4])
given by discretizing into an (nxm) grid. ''' samples = [[[] for _ in range(n)] for _ in range(m)] locs = np.array(ts.tables.individuals.location) locs.resize((int(len(ts.tables.individuals.location,)/3), 3)) max_x = np.ceil(10*max(locs[:,0]))/10 max_y = np.ceil(10*max(locs[:,1]))/10 for ind in ts.individuals(): if np.random.uniform() < prob: i = int(np.floor(ind.location[0] * n / max_x)) j = int(np.floor(ind.location[1] * m / max_x)) samples[i][j].extend(ind.nodes) return samples sample_grid = grid_samples(ts, 4, 4, 0.05) samples = [a for b in sample_grid for a in b] windows = np.linspace(0.0, ts.sequence_length, 1000) win_mids = (windows[1:] + windows[:-1])/2 bs = msprime.BranchLengthStatCalculator(ts) divs = np.array(bs.divergence(samples, windows=windows)) fig = plt.figure(figsize=(15,4)) for i in range(divs.shape[1]): plt.plot(win_mids, divs[:,i]) plt.savefig(outdir + "/divergences_1000.png", dpi=288)