def test_legacy_errors(self): defaults = pyslim.default_slim_metadata with self.assertRaisesRegex(ValueError, "legacy"): pyslim.decode_mutation(defaults('mutation')) with self.assertRaisesRegex(ValueError, "legacy"): pyslim.decode_population(defaults('population')) with self.assertRaisesRegex(ValueError, "legacy"): pyslim.decode_individual(defaults('individual')) with self.assertRaisesRegex(ValueError, "legacy"): pyslim.decode_node(defaults('node')) with self.assertRaisesRegex(ValueError, "legacy"): pyslim.encode_mutation(defaults('mutation')) with self.assertRaisesRegex(ValueError, "legacy"): pyslim.encode_population(defaults('population')) with self.assertRaisesRegex(ValueError, "legacy"): pyslim.encode_individual(defaults('individual')) with self.assertRaisesRegex(ValueError, "legacy"): pyslim.encode_node(defaults('node'))
def test_mutation_metadata(self): for md_length in [0, 1, 5]: md = [pyslim.MutationMetadata( mutation_type=j, selection_coeff=0.5, population=j, slim_time=10 + j, nucleotide=(j % 5) - 1) for j in range(md_length)] md_bytes = pyslim.encode_mutation(md) new_md = pyslim.decode_mutation(md_bytes) self.assertEqual(len(md), len(new_md)) for x, y in zip(md, new_md): self.assertEqual(x, y)
def test_annotate_mutations(self): for ts in self.get_slim_examples(): tables = ts.tables new_tables = ts.tables metadata = [] for md in tskit.unpack_bytes(tables.mutations.metadata, tables.mutations.metadata_offset): dm = pyslim.decode_mutation(md) edm = pyslim.encode_mutation(dm) self.assertEqual(md, edm) metadata.append(dm) pyslim.annotate_mutation_metadata(new_tables, metadata) self.assertEqual(tables, new_tables)
import pyslim import msprime ts = pyslim.load("simple.trees") tables = ts.tables print(tables) # mutations mut_metadata = [] for md in msprime.unpack_bytes(tables.mutations.metadata, tables.mutations.metadata_offset): dm = pyslim.decode_mutation(md) edm = pyslim.encode_mutation(dm) assert (md == edm) mut_metadata.append(dm) pyslim.annotate_mutations(tables, mut_metadata) # nodes node_metadata = [] for md in msprime.unpack_bytes(tables.nodes.metadata, tables.nodes.metadata_offset): dn = pyslim.decode_node(md) edn = pyslim.encode_node(dn) assert (md == edn) node_metadata.append(dn) pyslim.annotate_nodes(tables, node_metadata)
def throw_mut_on_tree(ts): # this function takes an unmutated tree sequence and "throws" a single mutation onto it, representing # the standing variant in a sweep that starts immediately after burnin global args if not args.af: n = args.Ne else: n = 2 * 14474 l = args.l r = args.r q = args.q c = args.c # find total tree length times sequence extent tree_sizes = np.array([ t.total_branch_length * (np.ceil(t.interval[1]) - np.ceil(t.interval[0])) for t in ts.trees() ]) tree_sizes /= sum(tree_sizes) # pick the tree tree_index = np.random.choice(ts.num_trees, size=1, p=tree_sizes) t = ts.first() for (i, t) in enumerate(ts.trees()): if i == tree_index: break assert (t.index == tree_index) # pick the branch cpicked = -1 while cpicked < c: treeloc = t.total_branch_length * np.random.uniform() for mut_n in t.nodes(): if mut_n != t.root: treeloc -= t.branch_length(mut_n) if treeloc <= 0: cpicked = t.num_samples(mut_n) / (n) #print(cpicked) break # pick the location on the sequence mut_base = 0.0 + np.random.randint( low=np.ceil(t.interval[0]), high=np.ceil(t.interval[1]), size=1) # the following assumes that there's no other mutations in the tree sequence assert (ts.num_sites == 0) # the mutation metadata mut_md = pyslim.MutationMetadata(mutation_type=1, selection_coeff=0.0, population=1, slim_time=1) tables = ts.tables site_id = tables.sites.add_row(position=mut_base, ancestral_state=b'') tables.mutations.add_row(site=site_id, node=mut_n, derived_state='1', metadata=pyslim.encode_mutation([mut_md])) mut_ts = pyslim.load_tables(tables) # genotypes #out_slim_targets = open('%s.slim.targets'%(out),'w') #for i,g in enumerate(mut_ts.genotype_matrix()[0]): # if g == 1: # #print(i) # out_slim_targets.write('%d\n'%(i)) #out_slim_targets.close() if not args.q: print(mut_ts.genotype_matrix()) print('%d / %d' % (np.sum(mut_ts.genotype_matrix()), n)) freq = np.sum(mut_ts.genotype_matrix()) / (n) return mut_base, freq, mut_ts