def test_reload_annotate(self): """ Test the ability of SLiM to load our files after annotation. """ for ts, basename in self.get_slim_restarts(): tables = ts.tables metadata = list(pyslim.extract_mutation_metadata(tables)) has_nucleotides = (metadata[0][0].nucleotide >= 0) if has_nucleotides: nucs = [random.choice([0, 1, 2, 3]) for _ in metadata] refseq = "".join( random.choices(pyslim.NUCLEOTIDES, k=int(ts.sequence_length))) for n, md in zip(nucs, metadata): for j in range(len(md)): md[j].nucleotide = n else: refseq = None for md in metadata: for j in range(len(md)): md[j].selection_coeff = random.random() pyslim.annotate_mutation_metadata(tables, metadata) in_ts = pyslim.load_tables(tables, reference_sequence=refseq) # put it through SLiM (which just reads in and writes out) out_ts = self.run_slim_restart(in_ts, basename) # check for equality, in everything but the last provenance self.verify_slim_restart_equality(in_ts, out_ts)
def _update_tables(self): """Remove extra psuedopopulation nodes.""" # get mutable tskit.TableCollection tables = self.tree_sequence.dump_tables() nnodes = tables.nodes.time.size # there is a null SLiM population (0) that doesnt really exist # and the actual poulation (1). So we set all to 0. tables.nodes.population = np.zeros(nnodes, dtype=np.int32) #meta = tables.metadata #meta["SLiM"]["generation"] = int(meta["SLiM"]["generation"] / 2.) #tables.metadata = meta #tables.nodes.time /= 2 #tables.mutations.time /= 2. # drop nodes that are not connected to anything. This includes # the pseudo-nodes representing half of the haploid populations. nodes_in_edge_table = list( set(tables.edges.parent).union(tables.edges.child)) # remove the empty population nodes by using simplify, which tables.simplify( samples=nodes_in_edge_table, keep_input_roots=True, filter_individuals=True, filter_populations=True, filter_sites=False, ) # turn it back into a treesequence self.tree_sequence = pyslim.load_tables(tables)
def test_annotate_individuals(self): for ts in self.get_msprime_examples(): slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) tables = slim_ts.tables metadata = list(pyslim.extract_individual_metadata(tables)) self.assertEqual(len(metadata), slim_ts.num_individuals) sexes = [ random.choice([ pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE ]) for _ in metadata ] for j in range(len(metadata)): metadata[j].sex = sexes[j] pyslim.annotate_individual_metadata(tables, metadata) new_ts = pyslim.load_tables(tables) for j, ind in enumerate(new_ts.individuals()): md = ind.metadata self.assertEqual(md.sex, sexes[j]) # try loading this into SLiM loaded_ts = self.run_msprime_restart(new_ts, sex="A") self.verify_annotated_tables(new_ts, slim_ts) self.verify_annotated_trees(new_ts, slim_ts) self.verify_haplotype_equality(new_ts, slim_ts)
def test_load(self, recipe): fn = recipe["path"]["ts"] # load in msprime then switch msp_ts = tskit.load(fn) assert isinstance(msp_ts, tskit.TreeSequence) # transfer tables msp_tables = msp_ts.dump_tables() new_ts = pyslim.load_tables(msp_tables) assert isinstance(new_ts, pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) new_tables = new_ts.dump_tables() self.assertTableCollectionsEqual(msp_tables, new_tables) # convert directly new_ts = pyslim.SlimTreeSequence(msp_ts) assert isinstance(new_ts, pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) new_tables = new_ts.dump_tables() self.assertTableCollectionsEqual(msp_tables, new_tables) # load to pyslim from file slim_ts = pyslim.load(fn) assert isinstance(slim_ts, pyslim.SlimTreeSequence) slim_tables = slim_ts.dump_tables() self.assertTableCollectionsEqual(msp_tables, slim_tables) assert slim_ts.metadata['SLiM']['generation'] == new_ts.metadata[ 'SLiM']['generation']
def test_load(self): for _, ex in self.get_slim_examples(return_info=True): fn = ex['basename'] + ".trees" # load in msprime then switch msp_ts = tskit.load(fn) self.assertTrue(type(msp_ts) is msprime.TreeSequence) # transfer tables msp_tables = msp_ts.tables new_ts = pyslim.load_tables(msp_tables, legacy_metadata=True) self.assertTrue(isinstance(new_ts, pyslim.SlimTreeSequence)) self.verify_times(msp_ts, new_ts) new_tables = new_ts.tables self.assertTableCollectionsEqual(msp_tables, new_tables) # convert directly new_ts = pyslim.SlimTreeSequence(msp_ts) self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) new_tables = new_ts.tables self.assertTableCollectionsEqual(msp_tables, new_tables) # load to pyslim from file slim_ts = pyslim.load(fn, legacy_metadata=True) self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence) slim_tables = slim_ts.tables self.assertTableCollectionsEqual(msp_tables, slim_tables) self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
def test_reload_annotate(self): # Test the ability of SLiM to load our files after annotation. for ts, basename in self.get_slim_restarts(no_op=True): tables = ts.tables metadata = [m.metadata for m in tables.mutations] has_nucleotides = tables.metadata['SLiM']['nucleotide_based'] if has_nucleotides: nucs = [random.choice([0, 1, 2, 3]) for _ in metadata] refseq = "".join( random.choices(pyslim.NUCLEOTIDES, k=int(ts.sequence_length))) for n, md in zip(nucs, metadata): for m in md['mutation_list']: m["nucleotide"] = n else: refseq = None for md in metadata: for m in md['mutation_list']: m["selection_coeff"] = random.random() ms = tables.mutations.metadata_schema tables.mutations.packset_metadata( [ms.validate_and_encode_row(r) for r in metadata]) in_ts = pyslim.load_tables(tables, reference_sequence=refseq) # put it through SLiM (which just reads in and writes out) out_ts = self.run_slim_restart(in_ts, basename) # check for equality, in everything but the last provenance self.verify_slim_restart_equality(in_ts, out_ts)
def test_annotate_individuals(self): for ts in self.get_msprime_examples(): slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) tables = slim_ts.tables top_md = tables.metadata top_md['SLiM']['separate_sexes'] = True tables.metadata = top_md metadata = [ind.metadata for ind in tables.individuals] sexes = [random.choice([pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE]) for _ in metadata] for j in range(len(metadata)): metadata[j]["sex"] = sexes[j] ims = tables.individuals.metadata_schema tables.individuals.packset_metadata( [ims.validate_and_encode_row(r) for r in metadata]) pop_metadata = [p.metadata for p in tables.populations] for j, md in enumerate(pop_metadata): # nonWF models always have this md['sex_ratio'] = 0.0 pms = tables.populations.metadata_schema tables.populations.packset_metadata( [pms.validate_and_encode_row(r) for r in pop_metadata]) new_ts = pyslim.load_tables(tables) for j, ind in enumerate(new_ts.individuals()): md = ind.metadata self.assertEqual(md["sex"], sexes[j]) self.verify_annotated_tables(new_ts, slim_ts) self.verify_annotated_trees(new_ts, slim_ts) self.verify_haplotype_equality(new_ts, slim_ts) # try loading this into SLiM loaded_ts = self.run_msprime_restart(new_ts, sex="A") self.verify_trees_equal(new_ts, loaded_ts)
def test_recover_metadata(self): # msprime <=0.7.5 discards metadata, but we can recover it from provenance for ts in self.get_slim_examples(): t = ts.tables t.metadata_schema = tskit.MetadataSchema(None) t.metadata = b'' new_ts = pyslim.load_tables(t) self.assertEqual(new_ts.metadata, ts.metadata)
def test_load_tables(self): for ts in self.get_slim_examples(): self.assertTrue(isinstance(ts, pyslim.SlimTreeSequence)) tables = ts.tables new_ts = pyslim.load_tables(tables) self.assertTrue(isinstance(new_ts, pyslim.SlimTreeSequence)) new_tables = new_ts.tables self.assertEqual(tables, new_tables)
def test_load_tables(self, recipe): ts = recipe["ts"] assert isinstance(ts, pyslim.SlimTreeSequence) tables = ts.dump_tables() new_ts = pyslim.load_tables(tables) assert isinstance(new_ts, pyslim.SlimTreeSequence) new_tables = new_ts.dump_tables() assert tables == new_tables
def test_recover_metadata(self, recipe): # msprime <=0.7.5 discards metadata, but we can recover it from provenance ts = recipe["ts"] tables = ts.dump_tables() tables.metadata_schema = tskit.MetadataSchema(None) tables.metadata = b'' new_ts = pyslim.load_tables(tables) assert new_ts.metadata == ts.metadata
def test_load_tables(self): for ts in self.get_slim_examples(): self.assertTrue(type(ts) is pyslim.SlimTreeSequence) tables = ts.tables new_ts = pyslim.load_tables(tables, legacy_metadata=True) self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence) new_tables = new_ts.tables self.assertEqual(tables, new_tables)
def test_annotate_XY(self): random.seed(8) for ts in self.get_msprime_examples(): for genome_type in ["X", "Y"]: slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) tables = slim_ts.tables top_md = tables.metadata top_md['SLiM']['separate_sexes'] = True tables.metadata = top_md metadata = [ind.metadata for ind in tables.individuals] sexes = [ random.choice([ pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE ]) for _ in metadata ] for j in range(len(metadata)): metadata[j]["sex"] = sexes[j] ims = tables.individuals.metadata_schema tables.individuals.packset_metadata( [ims.validate_and_encode_row(r) for r in metadata]) node_metadata = [n.metadata for n in tables.nodes] for j in range(slim_ts.num_individuals): nodes = slim_ts.individual(j).nodes node_metadata[ nodes[0]]["genome_type"] = pyslim.GENOME_TYPE_X node_metadata[nodes[0]]["is_null"] = (genome_type != "X") if sexes[j] == pyslim.INDIVIDUAL_TYPE_MALE: node_metadata[ nodes[1]]["genome_type"] = pyslim.GENOME_TYPE_Y node_metadata[nodes[1]]["is_null"] = (genome_type != "Y") else: node_metadata[ nodes[1]]["genome_type"] = pyslim.GENOME_TYPE_X node_metadata[nodes[1]]["is_null"] = (genome_type != "X") nms = tables.nodes.metadata_schema tables.nodes.packset_metadata( [nms.validate_and_encode_row(r) for r in node_metadata]) pop_metadata = [p.metadata for p in tables.populations] for j, md in enumerate(pop_metadata): # nonWF models always have this md['sex_ratio'] = 0.0 pms = tables.populations.metadata_schema tables.populations.packset_metadata( [pms.validate_and_encode_row(r) for r in pop_metadata]) new_ts = pyslim.load_tables(tables) self.verify_annotated_tables(new_ts, slim_ts) self.verify_annotated_trees(new_ts, slim_ts) self.verify_haplotype_equality(new_ts, slim_ts) # try loading this into SLiM loaded_ts = self.run_msprime_restart(new_ts, sex=genome_type) self.verify_trees_equal(new_ts, loaded_ts)
def __init__( self, files: list = None, #takes exactly two files (for now) simlength: int = None, #length in generations popsize: int = None, #initial size of each population recomb: float = None, #recomcbination rate mutrate: float = None, #mutations rate chromosome=None, #'shadie.chromosome.ChromosomeBase' altgen: bool = True, #is model altgen or not? ): """ Reads in two SLiM .trees files, merges them, recapitates, overlays neutral mutations and saves info. """ if altgen is True: self.mutrate = mutrate / 2 else: self.mutrate = mutrate self.chromosome = chromosome self.simlength = simlength self.recomb = recomb self.popsize = popsize self.pops = None self.species = [] ids = [] species = [] #read in all thre tree sequences for i in range(0, len(files)): ts = pyslim.load(files[i]) species.append(ts) #merge the p0 and p1 populations in both edges tablelist = [] mod_tslist = [] for ts in species: tables = ts.tables tables.nodes.population = np.zeros(tables.nodes.num_rows, dtype=np.int32) modts = pyslim.load_tables(tables) mod_tslist.append(modts) #remove extra population onepop_tslist = [] for ts in mod_tslist: onepop = ts.simplify(keep_input_roots=True, keep_unary_in_individuals=True) onepop_tslist.append(onepop) self.onepop_tslist = onepop_tslist
def test_annotate_mutations(self): for ts in get_msprime_examples(): slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) tables = slim_ts.tables metadata = list(pyslim.extract_mutation_metadata(tables)) self.assertEqual(len(metadata), slim_ts.num_mutations) selcoefs = [random.uniform(0, 1) for _ in metadata] for j in range(len(metadata)): metadata[j].selection_coeff = selcoefs[j] pyslim.annotate_mutation_metadata(tables, metadata) new_ts = pyslim.load_tables(tables) for j, x in enumerate(new_ts.mutations()): md = pyslim.decode_mutation(x.metadata) self.assertEqual(md.selection_coeff, selcoefs[j])
def test_annotate_individuals(self): for ts in get_msprime_examples(): slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) tables = slim_ts.tables metadata = list(pyslim.extract_individual_metadata(tables)) self.assertEqual(len(metadata), slim_ts.num_individuals) sexes = [random.choice([pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE]) for _ in metadata] for j in range(len(metadata)): metadata[j].sex = sexes[j] pyslim.annotate_individual_metadata(tables, metadata) new_ts = pyslim.load_tables(tables) for j, ind in enumerate(new_ts.individuals()): md = pyslim.decode_individual(ind.metadata) self.assertEqual(md.sex, sexes[j])
def test_annotate_mutations(self): for ts in self.get_msprime_examples(): slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) tables = slim_ts.tables metadata = [m.metadata for m in tables.mutations] selcoefs = [random.uniform(0, 1) for _ in metadata] for j in range(len(metadata)): metadata[j]['mutation_list'][0]["selection_coeff"] = selcoefs[j] ms = tables.mutations.metadata_schema tables.mutations.packset_metadata( [ms.validate_and_encode_row(r) for r in metadata]) new_ts = pyslim.load_tables(tables) for j, x in enumerate(new_ts.mutations()): md = x.metadata self.assertEqual(md['mutation_list'][0]["selection_coeff"], selcoefs[j])
def test_annotate_nodes(self): for ts in get_msprime_examples(): slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) tables = slim_ts.tables metadata = list(pyslim.extract_node_metadata(tables)) self.assertEqual(len(metadata), slim_ts.num_nodes) gtypes = [random.choice([pyslim.GENOME_TYPE_X, pyslim.GENOME_TYPE_Y]) for _ in metadata] for j in range(len(metadata)): if metadata[j] is not None: metadata[j].genome_type = gtypes[j] pyslim.annotate_node_metadata(tables, metadata) new_ts = pyslim.load_tables(tables) for j, x in enumerate(new_ts.nodes()): md = pyslim.decode_node(x.metadata) if md is not None: self.assertEqual(md.genome_type, gtypes[j])
def test_annotate_nodes(self): for ts in self.get_msprime_examples(): slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) tables = slim_ts.tables metadata = [n.metadata for n in tables.nodes] gtypes = [random.choice([pyslim.GENOME_TYPE_X, pyslim.GENOME_TYPE_Y]) for _ in metadata] for md, g in zip(metadata, gtypes): if md is not None: md["genome_type"] = g nms = tables.nodes.metadata_schema tables.nodes.packset_metadata( [nms.validate_and_encode_row(r) for r in metadata]) new_ts = pyslim.load_tables(tables) for x, g in zip(new_ts.nodes(), gtypes): if x.metadata is not None: self.assertEqual(x.metadata["genome_type"], g)
def _remove_null_population_and_nodes(self): """Call tskit simplify function to remove null pop. There is a null population in shadie simulations because we define an alternation of generations with two alternating subpopulations. At the final generation of shadie SLiMulation the generation is even, and so we ... """ # set population=0 for all nodes in each ts. Nodes from the # diploid sub-generation are currently labeled as population=1. for idx, treeseq in enumerate(self._tree_sequences): # tskit tables are immutable, but we can modify a copy of # the table and use load_tables to make a new ts from it. tables = treeseq.tables # modify the tables to set population to 0 for all nnodes = tables.nodes.num_rows tables.nodes.population = np.zeros(nnodes, dtype=np.int32) # modify table metadata for SLiM sim length # tables.metadata["SLiM"]["generation"] = int( # tables.metadata["SLiM"]["generation"] / 2) # tables.mutations.time = tables.mutations.time / 2. # tables.nodes.time = tables.nodes.time / 2. # drop nodes that are not connected to anything. This includes # the pseudo-nodes representing half of the haploid populations. nodes_in_edge_table = list( set(tables.edges.parent).union(tables.edges.child)) # reload treeseq FROM modified tables mod_tree_seq = pyslim.load_tables(tables) # remove the empty population (p1) by using simplify, which will # find that there are no longer any nodes in population=1. This # does not remove any Nodes, but it does remove a population. # https://tskit.dev/tskit/docs/stable/_modules/tskit/tables.html self._tree_sequences[idx] = mod_tree_seq.simplify( samples=nodes_in_edge_table, keep_input_roots=True, keep_unary_in_individuals=True)
def test_load(self): for fn in self.get_slim_example_files(): # load in msprime then switch msp_ts = tskit.load(fn) self.assertTrue(type(msp_ts) is msprime.TreeSequence) # transfer tables msp_tables = msp_ts.tables new_ts = pyslim.load_tables(msp_tables) self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) self.assertEqual(msp_tables, new_ts.tables) # convert directly new_ts = pyslim.SlimTreeSequence(msp_ts) self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) self.assertEqual(msp_tables, new_ts.tables) # load to pyslim from file slim_ts = pyslim.load(fn) self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence) self.assertEqual(msp_tables, slim_ts.tables) self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
def _update_tables(self): """DEPRECATED. Alternative approach to remove nulll pop and divide time. This didn't work, still squishes edges... Try this stuff next... https://github.com/tskit-dev/pyslim/blob/625295ba6b4ae8e8400953be65b03b3630c1430f/docs/vignette_continuing.md#continuing-the-simulation """ for idx, treeseq in enumerate(self._tree_sequences): # get mutable tskit.TableCollection tables = treeseq.dump_tables() nnodes = tables.nodes.time.size # there is a null SLiM population (0) that doesnt really exist # and the actual poulation (1). So we set all to 0. tables.nodes.population = np.zeros(nnodes, dtype=np.int32) meta = tables.metadata meta["SLiM"]["generation"] = int(meta["SLiM"]["generation"] / 2.) tables.metadata = meta tables.nodes.time /= 2 tables.mutations.time /= 2. # turn it back into a treesequence treeseq = pyslim.load_tables(tables) #.tree_sequence() # drop nodes that are not connected to anything. This includes # the pseudo-nodes representing half of the haploid populations. nodes_in_edge_table = list( set(tables.edges.parent).union(tables.edges.child)) # remove the empty population nodes by using simplify, which # will remove unconnected nodes (those not in samples). This # does not remove any Nodes, but it does remove a population. # https://tskit.dev/tskit/docs/stable/_modules/tskit/tables.html self._tree_sequences[idx] = treeseq.simplify( samples=nodes_in_edge_table, keep_input_roots=True, keep_unary_in_individuals=True)
def test_annotate_XY(self): for ts in self.get_msprime_examples(): for genome_type in ["X", "Y"]: slim_ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) tables = slim_ts.tables metadata = list(pyslim.extract_individual_metadata(tables)) self.assertEqual(len(metadata), slim_ts.num_individuals) sexes = [ random.choice([ pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE ]) for _ in metadata ] for j in range(len(metadata)): metadata[j].sex = sexes[j] pyslim.annotate_individual_metadata(tables, metadata) node_metadata = list(pyslim.extract_node_metadata(tables)) self.assertEqual(len(node_metadata), slim_ts.num_nodes) for j in range(slim_ts.num_individuals): nodes = slim_ts.individual(j).nodes node_metadata[nodes[0]].genome_type = pyslim.GENOME_TYPE_X node_metadata[nodes[0]].is_null = (genome_type != "X") if sexes[j] == pyslim.INDIVIDUAL_TYPE_MALE: node_metadata[ nodes[1]].genome_type = pyslim.GENOME_TYPE_Y node_metadata[nodes[1]].is_null = (genome_type != "Y") else: node_metadata[ nodes[1]].genome_type = pyslim.GENOME_TYPE_X node_metadata[nodes[1]].is_null = (genome_type != "X") pyslim.annotate_node_metadata(tables, node_metadata) new_ts = pyslim.load_tables(tables) # try loading this into SLiM loaded_ts = self.run_msprime_restart(new_ts, sex=genome_type) self.verify_annotated_tables(new_ts, slim_ts) self.verify_annotated_trees(new_ts, slim_ts) self.verify_haplotype_equality(new_ts, slim_ts)
def throw_mut_on_tree(ts): # this function takes an unmutated tree sequence and "throws" a single mutation onto it, representing # the standing variant in a sweep that starts immediately after burnin global args if not args.af: n = args.Ne else: n = 2 * 14474 l = args.l r = args.r q = args.q c = args.c # find total tree length times sequence extent tree_sizes = np.array([ t.total_branch_length * (np.ceil(t.interval[1]) - np.ceil(t.interval[0])) for t in ts.trees() ]) tree_sizes /= sum(tree_sizes) # pick the tree tree_index = np.random.choice(ts.num_trees, size=1, p=tree_sizes) t = ts.first() for (i, t) in enumerate(ts.trees()): if i == tree_index: break assert (t.index == tree_index) # pick the branch cpicked = -1 while cpicked < c: treeloc = t.total_branch_length * np.random.uniform() for mut_n in t.nodes(): if mut_n != t.root: treeloc -= t.branch_length(mut_n) if treeloc <= 0: cpicked = t.num_samples(mut_n) / (n) #print(cpicked) break # pick the location on the sequence mut_base = 0.0 + np.random.randint( low=np.ceil(t.interval[0]), high=np.ceil(t.interval[1]), size=1) # the following assumes that there's no other mutations in the tree sequence assert (ts.num_sites == 0) # the mutation metadata mut_md = pyslim.MutationMetadata(mutation_type=1, selection_coeff=0.0, population=1, slim_time=1) tables = ts.tables site_id = tables.sites.add_row(position=mut_base, ancestral_state=b'') tables.mutations.add_row(site=site_id, node=mut_n, derived_state='1', metadata=pyslim.encode_mutation([mut_md])) mut_ts = pyslim.load_tables(tables) # genotypes #out_slim_targets = open('%s.slim.targets'%(out),'w') #for i,g in enumerate(mut_ts.genotype_matrix()[0]): # if g == 1: # #print(i) # out_slim_targets.write('%d\n'%(i)) #out_slim_targets.close() if not args.q: print(mut_ts.genotype_matrix()) print('%d / %d' % (np.sum(mut_ts.genotype_matrix()), n)) freq = np.sum(mut_ts.genotype_matrix()) / (n) return mut_base, freq, mut_ts
[ims.validate_and_encode_row(md) for md in individual_metadata]) # add selected mutation mut_ind_id = random.choice(range(tables.individuals.num_rows)) mut_node_id = random.choice(np.where(tables.nodes.individual == mut_ind_id)[0]) mut_node = tables.nodes[mut_node_id] mut_metadata = { "mutation_list": [{ "mutation_type": 2, "selection_coeff": 0.1, "subpopulation": mut_node.population, "slim_time": int(tables.metadata['SLiM']['generation'] - mut_node.time), "nucleotide": -1 }] } site_num = tables.sites.add_row(position=5000, ancestral_state='') tables.mutations.add_row(node=mut_node_id, site=site_num, derived_state='1', time=mut_node.time, metadata=mut_metadata) slim_ts = pyslim.load_tables(tables) slim_ts.dump("recipe_17.9.trees")
# Keywords: Python, nonWF, non-Wright-Fisher, tree-sequence recording, tree sequence recording import msprime, pyslim, random ts = msprime.simulate(sample_size=10000, Ne=5000, length=1e8, mutation_rate=0.0, recombination_rate=1e-8) tables = ts.dump_tables() pyslim.annotate_defaults_tables(tables, model_type="nonWF", slim_generation=1) individual_metadata = list(pyslim.extract_individual_metadata(tables)) for j in range(len(individual_metadata)): individual_metadata[j].sex = random.choice([pyslim.INDIVIDUAL_TYPE_FEMALE, pyslim.INDIVIDUAL_TYPE_MALE]) individual_metadata[j].age = random.choice([0, 1, 2, 3, 4]) pyslim.annotate_individual_metadata(tables, individual_metadata) slim_ts = pyslim.load_tables(tables) slim_ts.dump("recipe_17.9.trees")