def run_dump_provenances(args): tree_sequence = tskit.load(args.tree_sequence) if args.human: for provenance in tree_sequence.provenances(): d = json.loads(provenance.record) print("id={}, timestamp={}, record={}".format( provenance.id, provenance.timestamp, json.dumps(d, indent=4))) else: tree_sequence.dump_text(provenances=sys.stdout)
def test_fileobj_multi(self, replicate_ts_fixture, fileobj): file_offsets = [] for ts in replicate_ts_fixture: ts.dump(fileobj) file_offsets.append(fileobj.tell()) fileobj.close() with open(fileobj.name, "rb") as f: for ts, file_offset in zip(replicate_ts_fixture, file_offsets): ts2 = tskit.load(f) file_offset2 = f.tell() assert ts.tables == ts2.tables assert file_offset == file_offset2
def run_mutate(args): tree_sequence = tskit.load(args.tree_sequence) tree_sequence = msprime.mutate( tree_sequence=tree_sequence, rate=args.mutation_rate, random_seed=args.random_seed, keep=args.keep, start_time=args.start_time, end_time=args.end_time, discrete=args.discrete, ) tree_sequence.dump(args.output_tree_sequence)
def run_dump_provenances(args): tree_sequence = tskit.load(args.tree_sequence) if args.human: for provenance in tree_sequence.provenances(): d = json.loads(provenance.record) print( "id={}, timestamp={}, record={}".format( provenance.id, provenance.timestamp, json.dumps(d, indent=4) ) ) else: tree_sequence.dump_text(provenances=sys.stdout)
def run_date(args): try: ts = tskit.load(args.tree_sequence) except tskit.FileFormatError as ffe: exit("Error loading '{}: {}".format(args.tree_sequence, ffe)) dated_ts = tsdate.date( ts, args.Ne, mutation_rate=args.mutation_rate, recombination_rate=args.recombination_rate, probability_space=args.probability_space, method=args.method, eps=args.epsilon, num_threads=args.num_threads, ignore_oldest_root=args.ignore_oldest, progress=args.progress) dated_ts.dump(args.output)
def ts_to_stairway(self, ts_path, num_bootstraps=1, mask_file=None): """ Converts the specified tskit tree sequence to text files used by stairway plot. """ derived_counts_all = [[] for _ in range(num_bootstraps + 1)] total_length = 0 num_samples = 0 for i, ts_p in enumerate(ts_path): ts = tskit.load(ts_p) total_length += ts.sequence_length num_samples = ts.num_samples haps = ts.genotype_matrix() SFSs = [] # Masking retain = np.full(ts.get_num_mutations(), False) if mask_file: mask_table = pd.read_csv(mask_file, sep="\t", header=None) chrom = ts_p.split("/")[-1].split(".")[0] sub = mask_table[mask_table[0] == chrom] mask_ints = pd.IntervalIndex.from_arrays(sub[1], sub[2]) snp_locs = [int(x.site.position) for x in ts.variants()] tmp_bool = [mask_ints.contains(x) for x in snp_locs] retain = np.logical_or(retain, tmp_bool) total_length -= np.sum(mask_ints.length) retain = np.logical_not(retain) # append unmasked SFS SFSs.append(allel.sfs(allel.HaplotypeArray(haps).count_alleles()[:, 1])[1:]) # get masked allele counts and append SFS allele_counts = allel.HaplotypeArray(haps[retain, :]).count_alleles() SFSs.append(allel.sfs(allele_counts[:, 1])[1:]) sfs_path = ts_p+".sfs.pdf" plots.plot_sfs(SFSs, sfs_path) # Bootstrap allele counts derived_counts_all[0].extend(allele_counts[:, 1]) for j in range(1, num_bootstraps + 1): nsites = np.shape(allele_counts)[0] bootset = np.random.choice(np.arange(0, nsites, 1), nsites, replace=True) bootac = allele_counts[bootset, :] der_bootac = bootac[:, 1] derived_counts_all[j].extend(der_bootac) # Get the SFS minus the 0 bin and write output stairway_files = [] for l in range(len(derived_counts_all)): sfs = allel.sfs(derived_counts_all[l])[1:] filename = self.workdir / "sfs_{}.txt".format(l) write_stairway_sfs(total_length, num_samples, sfs, filename) stairway_files.append(filename) return stairway_files
def main(): description = """Simple CLI wrapper for tsinfer tskit version: {} tsinfer version: {}""".format(tskit.__version__, tsinfer.__version__) parser = argparse.ArgumentParser( description=description, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--verbosity', '-v', action='count', default=0) parser.add_argument( "samples", help="The samples file name, as saved by tsinfer.SampleData.initialise()") parser.add_argument( "output", help="The path to write the output file to") parser.add_argument( "-l", "--length", default=None, type=int, help="The total sequence length") parser.add_argument( "-t", "--threads", default=1, type=int, help="The number of worker threads to use") parser.add_argument( "-m", "--method", default="C", choices=['C','P'], help="Which implementation to use, [C] (faster) or [P]ython (more debuggable)") parser.add_argument( "--inject-real-ancestors-from-ts", default=None, help="Instead of inferring ancestors, construct known ones from this tree sequence file path") parser.add_argument( "-V", "--version", action='version', version=description) args = parser.parse_args() engine = tsinfer.PY_ENGINE if args.method == "P" else tsinfer.C_ENGINE if not os.path.isfile(args.samples): raise ValueError("No samples file") sample_data = tsinfer.load(args.samples) if all(False for _ in sample_data.genotypes(inference_sites=True)): raise ValueError("No inference sites") if args.inject_real_ancestors_from_ts is not None: ancestor_data = tsinfer.AncestorData.initialise(sample_data, compressor=None) orig_ts = tskit.load(args.inject_real_ancestors_from_ts) eval_util.build_simulated_ancestors(sample_data, ancestor_data, orig_ts) ancestor_data.finalise() ancestors_ts = tsinfer.match_ancestors( sample_data, ancestor_data, engine=engine) ts = tsinfer.match_samples( sample_data, ancestors_ts, engine=engine, simplify=True) else: ts = tsinfer.infer( sample_data, num_threads=args.threads, engine=engine) ts.dump(args.output)
def verify(self, cmd, num_samples): with tempfile.TemporaryDirectory() as tmpdir: filename = pathlib.Path(tmpdir) / "output.trees" full_cmd = "python3 -m stdpopsim -q " + cmd + f" {filename}" subprocess.run(full_cmd, shell=True, check=True) # TODO converting to str isn't necessary in tskit 0.1.5. Remove. ts = tskit.load(str(filename)) self.assertEqual(ts.num_samples, num_samples) provenance = json.loads(ts.provenance(ts.num_provenances - 1).record) tskit.validate_provenance(provenance) stored_cmd = provenance["parameters"]["args"] self.assertEqual(stored_cmd[0], "-q") self.assertEqual(stored_cmd[1:-1], cmd.split())
def compare_python_api(self, input_ts, cmd, Ne, mutation_rate, method): with tempfile.TemporaryDirectory() as tmpdir: input_filename = pathlib.Path(tmpdir) / "input.trees" input_ts.dump(input_filename) output_filename = pathlib.Path(tmpdir) / "output.trees" full_cmd = "date " + str(input_filename) + f" {output_filename} " + cmd cli.tsdate_main(full_cmd.split()) output_ts = tskit.load(output_filename) dated_ts = tsdate.date(input_ts, Ne=Ne, mutation_rate=mutation_rate, method=method) print(dated_ts.tables.nodes.time, output_ts.tables.nodes.time) self.assertTrue(np.array_equal(dated_ts.tables.nodes.time, output_ts.tables.nodes.time))
def run_mutate(args): tree_sequence = tskit.load(args.tree_sequence) tree_sequence = msprime.sim_mutations( tree_sequence=tree_sequence, rate=args.mutation_rate, random_seed=args.random_seed, keep=True, start_time=args.start_time, end_time=args.end_time, discrete_genome=True, model=args.model, ) tree_sequence.dump(args.output_tree_sequence)
def combined_ts_constrained_samples(args): high_cov_samples = tsinfer.load(args.high_cov) dated_hgdp_1kg_sgdp_ts = tskit.load(args.dated_ts) sites_time = tsdate.sites_time_from_ts(dated_hgdp_1kg_sgdp_ts) dated_samples = tsdate.add_sampledata_times(high_cov_samples, sites_time) # Record number of constrained sites print("Total number of sites: ", sites_time.shape[0]) print("Number of ancient lower bounds: ", np.sum(high_cov_samples.min_site_times(individuals_only=True) != 0)) print("Number of corrected times: ", np.sum(dated_samples.sites_time[:] != sites_time)) high_cov_samples_copy = dated_samples.copy(args.output) high_cov_samples_copy.finalise()
def run_infer(args): setup_logging(args) try: sample_data = tsinfer.SampleData.load(args.samples) except exceptions.FileFormatError as e: # Check if the user has tried to infer a tree sequence, a common basic mistake try: tskit.load(args.samples) except tskit.FileFormatError: raise e # Re-raise the original error raise exceptions.FileFormatError( "Expecting a sample data file, not a tree sequence (you can create one " "via the Python function `tsinfer.SampleData.from_tree_sequence()`)." ) sample_data = tsinfer.SampleData.load(args.samples) ts = tsinfer.infer(sample_data, progress_monitor=args.progress, num_threads=args.num_threads) output_trees = get_output_trees_path(args.output_trees, args.samples) logger.info("Writing output tree sequence to {}".format(output_trees)) ts.dump(output_trees) summarise_usage()
def test_index_columns(self): ts = migration_example() ts.dump(self.temp_file) with kastore.load(self.temp_file) as store: all_data = dict(store) edge_removal_order = "indexes/edge_removal_order" edge_insertion_order = "indexes/edge_insertion_order" data = dict(all_data) del data[edge_removal_order] del data[edge_insertion_order] kastore.dump(data, self.temp_file) with pytest.raises(exceptions.LibraryError): tskit.load(self.temp_file) data = dict(all_data) del data[edge_removal_order] kastore.dump(data, self.temp_file) with pytest.raises(exceptions.LibraryError): tskit.load(self.temp_file) data = dict(all_data) del data[edge_insertion_order] kastore.dump(data, self.temp_file) with pytest.raises(exceptions.LibraryError): tskit.load(self.temp_file) data = dict(all_data) data[edge_insertion_order] = data[edge_insertion_order][:1] kastore.dump(data, self.temp_file) with pytest.raises(exceptions.FileFormatError): tskit.load(self.temp_file) data = dict(all_data) data[edge_removal_order] = data[edge_removal_order][:1] kastore.dump(data, self.temp_file) with pytest.raises(exceptions.FileFormatError): tskit.load(self.temp_file)
def verify(self, cmd, num_samples, seed=1): with tempfile.TemporaryDirectory() as tmpdir: filename = pathlib.Path(tmpdir) / "output.trees" full_cmd = cmd + f" -q -o {filename} --seed={seed}" with mock.patch("stdpopsim.cli.setup_logging"): stdout, stderr = capture_output(cli.stdpopsim_main, full_cmd.split()) self.assertEqual(len(stderr), 0) self.assertEqual(len(stdout), 0) ts = tskit.load(str(filename)) self.assertEqual(ts.num_samples, num_samples) provenance = json.loads(ts.provenance(0).record) prov_seed = provenance["parameters"]["random_seed"] self.assertEqual(prov_seed, seed)
def run_dump_macs(args): """ Write a macs formatted file so we can import into pbwt. """ tree_sequence = tskit.load(args.tree_sequence) n = tree_sequence.get_sample_size() m = tree_sequence.get_sequence_length() print("COMMAND:\tnot_macs {} {}".format(n, m)) print("SEED:\tASEED") for variant in tree_sequence.variants(as_bytes=True): print( "SITE:", variant.index, variant.position / m, 0.0, "{}".format(variant.genotypes.decode()), sep="\t")
def compute_stats(ts_file): st = dict() ts = tskit.load(ts_file) for key, func in stats_functions.items(): try: res = func(ts) except Exception: # Print the filename so it's easier to trace problems. warning(f"{ts_file} triggered exception") raise if res is not None: st[key] = res return st
def main(): """Run main function.""" args = parse_args(sys.argv[1:]) # ========================================================================= # Gather args # ========================================================================= tree = args.tree outfile = args.outfile if outfile is None: outfile = path.split(tree)[-1] ref_set = args.ref tar_set = args.tar gnn_win = args.gnn_windows pop_ids = args.pop_ids[0] # ========================================================================= # Loading and Checks # ========================================================================= ts = tskit.load(tree) # load tree print("tree loaded") # set refernce for comparison if ref_set: # custom ref sets for comparison ref_nodes = [] with open(ref_set) as f: for line in f: x = line.split(",") assert len(x) > 1, "recheck delimiter should be ," ref_nodes.append(list(map(int, x))) else: # all populations ref_nodes = [ ts.samples(population=i) for i in range(ts.num_populations) ] # set target population if tar_set is None: tar_nodes = ts.samples() elif tar_set.isnumeric(): tar_nodes = ts.samples(population=int(tar_set)) else: tar_nodes = [] with open(tar_set) as f: for line in f: x = line.split(",") assert len(x) > 1, "recheck delimiter should be ," tar_nodes.extend(list(map(int, x))) # ========================================================================= # Main executions # ========================================================================= if gnn_win: gnn_windows_fx(outfile, ts, ref_nodes, tar_nodes, pop_ids) else: gnn_fx(outfile, ts, ref_nodes, tar_nodes, pop_ids)
def test_simulate(self): saved_slim_env = os.environ.get("SLIM") with tempfile.NamedTemporaryFile(mode="w") as f: self.docmd(f"--slim-path slim HomSap -o {f.name}") ts = tskit.load(f.name) self.assertEqual(ts.num_samples, 10) self.assertTrue(all(tree.num_roots == 1 for tree in ts.trees())) if saved_slim_env is None: del os.environ["SLIM"] else: os.environ["SLIM"] = saved_slim_env with tempfile.NamedTemporaryFile(mode="w") as f: self.docmd(f"--slim-no-recapitation HomSap -o {f.name}") ts = tskit.load(f.name) self.assertEqual(ts.num_samples, 10) with tempfile.NamedTemporaryFile(mode="w") as f: self.docmd( f"--slim-no-recapitation --slim-no-burnin HomSap -o {f.name}") ts = tskit.load(f.name) self.assertEqual(ts.num_samples, 10) # verify sample counts for a multipopulation demographic model with tempfile.NamedTemporaryFile(mode="w") as f: cmd = (f"-e slim HomSap -o {f.name} -l 0.00001 -c chr1 -s 1234 -q " "-d OutOfAfrica_3G09 0 0 8").split() capture_output(stdpopsim.cli.stdpopsim_main, cmd) ts = tskit.load(f.name) self.assertEqual(ts.num_populations, 3) observed_counts = [0, 0, 0] for sample in ts.samples(): observed_counts[ts.get_population(sample)] += 1 self.assertEqual(observed_counts[0], 0) self.assertEqual(observed_counts[1], 0) self.assertEqual(observed_counts[2], 8) self.assertTrue(all(tree.num_roots == 1 for tree in ts.trees()))
def run_compress(args): setup_logging(args) for file_arg in args.files: logger.info("Compressing {}".format(file_arg)) try: ts = tskit.load(file_arg) except tskit.FileFormatError as ffe: exit("Error loading '{}': {}".format(file_arg, ffe)) logger.debug("Loaded tree sequence") infile = pathlib.Path(file_arg) outfile = pathlib.Path(file_arg + args.suffix) check_output(outfile, args) tszip.compress(ts, outfile, variants_only=args.variants_only) remove_input(infile, args)
def test_accuracy(reps): n = 100 Ne = 10000 mut_rate = 1e-8 rec_rate = 1e-8 theta = 4*10000*mut_rate rho = 4*10000*rec_rate length = 1e5 compare_df_master = pd.DataFrame(columns = ['truth', 'tsdate', 'tsdate_inferred', 'relate', 'geva']) for rep in range(reps): vanilla_ts = msprime.simulate(sample_size=n, Ne=Ne, mutation_rate=mut_rate, recombination_rate=rec_rate, length=length) ts, dated_ts, dated_inferred_ts_mut = run_simulation(vanilla_ts, n, Ne, theta, rho) # Run GEVA samples = generate_samples(ts, 'testing') ages = samplesdata_to_ages(samples, Ne=Ne, length=length, mut_rate=mut_rate, rec_rate=rec_rate, filename=str("test")) # Run Relate on simulated data relate_path = "/Users/anthonywohns/Documents/mcvean_group/software/relate_v1.0.13_MacOSX/" def run_relate(ts, relate_path): subprocess.check_output([relate_path + "bin/RelateFileFormats", "--mode", "ConvertFromVcf", "--haps", relate_path + "age_compare/compare.haps", "--sample", relate_path + "age_compare/compare.sample", "-i", path + "tmp/test"]) subprocess.check_output([relate_path + "bin/Relate", "--mode", "All", "-m", str(mut_rate), "-N", "20000", "--haps", relate_path + "age_compare/compare.haps", "--sample", relate_path + "age_compare/compare.sample", "--seed", "1", "-o", "compare", "--map", relate_path + "genetic_map.txt"]) subprocess.check_output( [relate_path + "bin/RelateFileFormats", "--mode", "ConvertToTreeSequence", "-i", "compare", "-o", "compare"]) run_relate(ts, relate_path) relate_ts = tskit.load('compare.trees') table_collection = relate_ts.dump_tables() table_collection.nodes.flags[0:n] = 1 table_collection = relate_ts.dump_tables() table_collection.nodes.set_columns( flags=np.array(np.concatenate( [np.repeat(1, n), np.repeat(0, relate_ts.num_nodes - n)]), dtype='uint32'), time=relate_ts.tables.nodes.time) relate_ts_fixed = table_collection.tree_sequence() ts.dump('true_ts_' + str(rep) + '.trees') dated_ts.dump('dated_ts_' + str(rep) + '.trees') dated_inferred_ts_mut.dump('dated_inferred_ts_' + str(rep) + '.trees') relate_ts_fixed.dump('relate_ts_' + str(rep) + '.trees') compare_dict = compare_muts(n, ts, dated_ts, dated_inferred_ts_mut, ages) compare_df_master = pd.concat([compare_df_master, compare_dict]) compare_df_master.to_csv("compare_df")
def run_tsinfer_mismatch( sample_fn, length, num_threads=1, inject_real_ancestors_from_ts_fn=None, rho=None, error_probability=None, ): with tempfile.NamedTemporaryFile("w+") as ts_out: cmd = [tsinfer_executable, ts_out.name, "-s", "infer", sample_fn] # cmd += ["--threads", str(num_threads), ts_out.name] cpu_time, memory_use = time_cmd(cmd) ts_simplified = tskit.load(ts_out.name) return ts_simplified, cpu_time, memory_use
def test_duplicate_positions(self): ts = msprime.simulate(10, mutation_rate=10) for version in [2, 3]: tskit.dump_legacy(ts, self.legacy_file_name, version=version) root = h5py.File(self.legacy_file_name, "r+") root['mutations/position'][:] = 0 root.close() stdout, stderr = capture_output( cli.tskit_main, ["upgrade", "-d", self.legacy_file_name, self.current_file_name]) self.assertEqual(stdout, "") tsp = tskit.load(self.current_file_name) self.assertEqual(tsp.sample_size, ts.sample_size) self.assertEqual(tsp.num_sites, 1)
def verify(self, cmd, num_samples, seed=1): with tempfile.TemporaryDirectory() as tmpdir: filename = pathlib.Path(tmpdir) / "output.trees" full_cmd = f"{sys.executable} -m stdpopsim -q {cmd} -o {filename} -s {seed}" subprocess.run(full_cmd, shell=True, check=True) ts = tskit.load(str(filename)) assert ts.num_samples == num_samples provenance = json.loads(ts.provenance(ts.num_provenances - 1).record) tskit.validate_provenance(provenance) stored_cmd = provenance["parameters"]["args"] assert stored_cmd[0] == "-q" assert stored_cmd[-1] == str(seed) assert stored_cmd[-2] == "-s" assert stored_cmd[1:-4] == cmd.split()
def verify_equal_length_columns(self, ts, table): ts.dump(self.temp_file) with kastore.load(self.temp_file) as store: all_data = dict(store) table_cols = [ colname for colname in all_data.keys() if colname.startswith(table) ] # Remove all the 'offset' columns for col in list(table_cols): if col.endswith("_offset"): main_col = col[:col.index("_offset")] table_cols.remove(main_col) table_cols.remove(col) if "metadata_schema" in col: table_cols.remove(col) # Remaining columns should all be the same length for col in table_cols: for bad_val in [[], all_data[col][:-1]]: data = dict(all_data) data[col] = bad_val kastore.dump(data, self.temp_file) with pytest.raises(exceptions.FileFormatError): tskit.load(self.temp_file)
def load_tree(tree_file): """Reads tree sequence from disk. Parameters ---------- tree : str file path to tree sequence Returns ------- tskit tree sequencing object """ return tskit.load(tree_file)
def test_conversion(self): ts1 = msprime.simulate(10) for version in [2, 3]: tskit.dump_legacy(ts1, self.legacy_file_name, version=version) stdout, stderr = capture_output( cli.tskit_main, [ "upgrade", self.legacy_file_name, self.current_file_name]) ts2 = tskit.load(self.current_file_name) self.assertEqual(stdout, "") self.assertEqual(stderr, "") # Quick checks to ensure we have the right tree sequence. # More thorough checks are done elsewhere. self.assertEqual(ts1.get_sample_size(), ts2.get_sample_size()) self.assertEqual(ts1.num_edges, ts2.num_edges) self.assertEqual(ts1.get_num_trees(), ts2.get_num_trees())
def ts_to_seg(path, n=None): """ Converts a tree sequence into a seg file for use by :code:`smcsmc.run_smcsmcs()`. This is especially useful if you are simulating data from :code:`msprime` and would like to directly use it in :code:`smcsmc`. For details of how to do this, please see the tutorial on simulation using :code:`msprime`. Provide the path to the tree sequence, and the suffix will be replaced by :code:`.seg`. This code is adapted from PopSim. :param str path: Full file path to the tree sequence created by :code:`ts.dump`. :param list n: If more than one sample of haplotypes is being analysed simulateously, provide it here as a list. Otherwise, simply provide the number of haplotypes as a single-element list. """ if n is None: ts = tskit.load(pathe) dirr = os.path.dirname(path) filen = os.path.basename(path) sep = filen.split(".") output = os.path.join(dirr, ".".join(sep) + ".seg") fi = open(output, "w") prev = 1 cur = 0 for var in ts.variants(): cur = int(var.site.position) if cur > prev: geno = ''.join(map(str, var.genotypes)) fi.write(f"{prev}\t{cur-prev}\t{geno}\n") prev = cur fi.close() else: for sample_size in n: ts = smcsmc.utils.prune_tree_sequence(path, sample_size) dirr = os.path.dirname(path) filen = os.path.basename(path) sep = filen.split(".") chrom = sep[0] sep.insert(0, str(sample_size)) output = os.path.join(dirr, ".".join(sep) + ".seg") fi = open(output, "w") prev = 1 cur = 0 for var in ts.variants(): cur = int(var.site.position) if cur > prev: geno = ''.join(map(str, var.genotypes)) fi.write(f"{prev}\t{cur-prev}\t{geno}\n") prev = cur fi.close() return None
def run_list(args): setup_logging(args) # First try to load with tskit. ts = None try: ts = tskit.load(args.path) except tskit.FileFormatError: pass if ts is None: tsinfer_file = tsinfer.load(args.path) if args.storage: print(tsinfer_file.info) else: print(tsinfer_file) else: summarise_tree_sequence(args.path, ts)
def run_tsdate(input_fn, Ne, mut_rate, timepoints, method): with tempfile.NamedTemporaryFile("w+") as ts_out: cmd = [ sys.executable, tsdate_executable, input_fn, ts_out.name, str(Ne), "--mutation-rate", str(mut_rate), ] # cmd = ["tsdate", "date", input_fn, ts_out.name, str(Ne)] # cmd += ["--mutation-rate", str(mut_rate), "--timepoints", str(timepoints), "--method", str(method)] cpu_time, memory_use = time_cmd(cmd) dated_ts = tskit.load(ts_out.name) return dated_ts, cpu_time, memory_use
def load(cls, path): ''' Load a :class:`SlimTreeSequence` from a .trees file on disk. :param string path: The path to a .trees file. :rtype SlimTreeSequence: ''' ts = tskit.load(path) # extract the reference sequence from the kastore kas = kastore.load(path) if 'reference_sequence/data' in kas: int_rs = kas['reference_sequence/data'] reference_sequence = int_rs.tostring().decode('ascii') else: reference_sequence = None return cls(ts, reference_sequence)
def run_dump_macs(args): """ Write a macs formatted file so we can import into pbwt. """ tree_sequence = tskit.load(args.tree_sequence) n = tree_sequence.get_sample_size() m = tree_sequence.get_sequence_length() print("COMMAND:\tnot_macs {} {}".format(n, m)) print("SEED:\tASEED") for variant in tree_sequence.variants(as_bytes=True): print("SITE:", variant.index, variant.position / m, 0.0, "{}".format(variant.genotypes.decode()), sep="\t")
def load_from_stream(q_err, q_out, file_in): """ tskit.load() tree sequences from `file_in` and put them onto `q_out`. Uncaught exceptions are placed onto the `q_err` queue. """ try: with open(file_in, "rb") as f: while True: try: ts = tskit.load(f) except EOFError: break q_out.put(ts) except Exception as exc: tb = traceback.format_exc() q_err.put((exc, tb))
def run_dump_mutations(args): tree_sequence = tskit.load(args.tree_sequence) tree_sequence.dump_text(mutations=sys.stdout, precision=args.precision)
def run_dump_vcf(args): tree_sequence = tskit.load(args.tree_sequence) tree_sequence.write_vcf(sys.stdout, args.ploidy)
def run_dump_variants(args): tree_sequence = tskit.load(args.tree_sequence) for variant in tree_sequence.variants(as_bytes=True): print(variant.position, end="\t") print("{}".format(variant.genotypes.decode()))
def run_dump_haplotypes(args): tree_sequence = tskit.load(args.tree_sequence) for h in tree_sequence.haplotypes(): print(h)
def run_dump_newick(args): tree_sequence = tskit.load(args.tree_sequence) for tree in tree_sequence.trees(): newick = tree.newick(precision=args.precision) print(newick)
def run_dump_sites(args): tree_sequence = tskit.load(args.tree_sequence) tree_sequence.dump_text(sites=sys.stdout, precision=args.precision)