def write_vcf(chrom): treefile = args.tree_file[chrom] vcf = open(args.vcffile[chrom], "w") mut_rate = args.mut_rate[chrom] seed = seeds[chrom] logfile.write("Simulating mutations on" + treefile + "\n") ts = msprime.load(treefile) rng = msprime.RandomGenerator(seed) nodes = msprime.NodeTable() edgesets = msprime.EdgesetTable() sites = msprime.SiteTable() mutations = msprime.MutationTable() migrations = msprime.MigrationTable() ts.dump_tables(nodes=nodes, edgesets=edgesets, migrations=migrations) mutgen = msprime.MutationGenerator(rng, mut_rate) mutgen.generate(nodes, edgesets, sites, mutations) logfile.write("Saving to" + args.vcffile[chrom] + "\n") mutated_ts = msprime.load_tables(nodes=nodes, edgesets=edgesets, sites=sites, mutations=mutations) mutated_ts.write_vcf(vcf, ploidy=1) return True
def run(self, ngens): nodes = msprime.NodeTable() edges = msprime.EdgeTable() migrations = msprime.MigrationTable() sites = msprime.SiteTable() mutations = msprime.MutationTable() provenances = msprime.ProvenanceTable() if self.deep_history: # initial population init_ts = msprime.simulate(self.N, recombination_rate=1.0) init_ts.dump_tables(nodes=nodes, edges=edges) nodes.set_columns(time=nodes.time + ngens, flags=nodes.flags) else: for _ in range(self.N): nodes.add_row(time=ngens) pop = list(range(self.N)) for t in range(ngens - 1, -1, -1): if self.debug: print("t:", t) print("pop:", pop) dead = [random.random() > self.survival for k in pop] # sample these first so that all parents are from the previous gen new_parents = [(random.choice(pop), random.choice(pop)) for k in range(sum(dead))] k = 0 if self.debug: print("Replacing", sum(dead), "individuals.") for j in range(self.N): if dead[j]: # this is: offspring ID, lparent, rparent, breakpoint offspring = nodes.num_rows nodes.add_row(time=t) lparent, rparent = new_parents[k] k += 1 bp = self.random_breakpoint() if self.debug: print("--->", offspring, lparent, rparent, bp) pop[j] = offspring if bp > 0.0: edges.add_row(left=0.0, right=bp, parent=lparent, child=offspring) if bp < 1.0: edges.add_row(left=bp, right=1.0, parent=rparent, child=offspring) if self.debug: print("Done! Final pop:") print(pop) flags = [(msprime.NODE_IS_SAMPLE if u in pop else 0) for u in range(nodes.num_rows)] nodes.set_columns(time=nodes.time, flags=flags) if self.debug: print("Done.") print("Nodes:") print(nodes) print("Edges:") print(edges) return msprime.TableCollection(nodes, edges, migrations, sites, mutations, provenances)
def _load_legacy_hdf5_v10(root, remove_duplicate_positions=False): # We cannot have duplicate positions in v10, so this parameter is ignored nodes_group = root["nodes"] nodes = msprime.NodeTable() metadata = None metadata_offset = None if "metadata" in nodes_group: metadata = nodes_group["metadata"] metadata_offset = nodes_group["metadata_offset"] nodes.set_columns(flags=nodes_group["flags"], population=nodes_group["population"], time=nodes_group["time"], metadata=metadata, metadata_offset=metadata_offset) edges_group = root["edges"] edges = msprime.EdgeTable() edges.set_columns(left=edges_group["left"], right=edges_group["right"], parent=edges_group["parent"], child=edges_group["child"]) migrations_group = root["migrations"] migrations = msprime.MigrationTable() if "left" in migrations_group: migrations.set_columns(left=migrations_group["left"], right=migrations_group["right"], node=migrations_group["node"], source=migrations_group["source"], dest=migrations_group["dest"], time=migrations_group["time"]) sites_group = root["sites"] sites = msprime.SiteTable() if "position" in sites_group: metadata = None metadata_offset = None if "metadata" in sites_group: metadata = sites_group["metadata"] metadata_offset = sites_group["metadata_offset"] sites.set_columns( position=sites_group["position"], ancestral_state=sites_group["ancestral_state"], ancestral_state_offset=sites_group["ancestral_state_offset"], metadata=metadata, metadata_offset=metadata_offset) mutations_group = root["mutations"] mutations = msprime.MutationTable() if "site" in mutations_group: metadata = None metadata_offset = None if "metadata" in mutations_group: metadata = mutations_group["metadata"] metadata_offset = mutations_group["metadata_offset"] mutations.set_columns( site=mutations_group["site"], node=mutations_group["node"], parent=mutations_group["parent"], derived_state=mutations_group["derived_state"], derived_state_offset=mutations_group["derived_state_offset"], metadata=metadata, metadata_offset=metadata_offset) provenances_group = root["provenances"] provenances = msprime.ProvenanceTable() if "timestamp" in provenances_group: timestamp = provenances_group["timestamp"] timestamp_offset = provenances_group["timestamp_offset"] if "record" in provenances_group: record = provenances_group["record"] record_offset = provenances_group["record_offset"] else: record = np.empty_like(timestamp) record_offset = np.zeros_like(timestamp_offset) provenances.set_columns(timestamp=timestamp, timestamp_offset=timestamp_offset, record=record, record_offset=record_offset) provenances.add_row(_get_upgrade_provenance(root)) return msprime.load_tables(nodes=nodes, edges=edges, migrations=migrations, sites=sites, mutations=mutations, provenances=provenances)