def run_augment_ancestors(args): setup_logging(args) sample_data = tsinfer.SampleData.load(args.samples) ancestors_trees = get_ancestors_trees_path(args.ancestors_trees, args.samples) output_path = args.augmented_ancestors logger.info( "Loading ancestral genealogies from {}".format(ancestors_trees)) ancestors_trees = tskit.load(ancestors_trees) progress_monitor = ProgressMonitor(enabled=args.progress, augment_ancestors=True) # TODO Need some error checking on these values n = args.num_samples N = sample_data.num_samples if n is None: n = int(math.ceil(10 * N / 100)) sample_indexes = np.linspace(0, N - 1, num=n).astype(int) ts = tsinfer.augment_ancestors( sample_data, ancestors_trees, sample_indexes, num_threads=args.num_threads, path_compression=not args.no_path_compression, progress_monitor=progress_monitor, ) logger.info("Writing output tree sequence to {}".format(output_path)) ts.dump(output_path) summarise_usage()
def run_augment(sample_data, ancestors_ts, subset, num_threads): progress_monitor = tsinfer.cli.ProgressMonitor(enabled=True, augment_ancestors=True) return tsinfer.augment_ancestors(sample_data, ancestors_ts, subset, num_threads=num_threads, progress_monitor=progress_monitor)
def tsinfer_dev(n, L, seed, num_threads=1, recombination_rate=1e-8, error_rate=0, engine="C", log_level="WARNING", debug=True, progress=False, path_compression=True): np.random.seed(seed) random.seed(seed) L_megabases = int(L * 10**6) # daiquiri.setup(level=log_level) ts = msprime.simulate(n, Ne=10**4, length=L_megabases, recombination_rate=recombination_rate, mutation_rate=1e-8, random_seed=seed) if debug: print("num_sites = ", ts.num_sites) assert ts.num_sites > 0 samples = tsinfer.SampleData.from_tree_sequence(ts) ancestor_data = tsinfer.generate_ancestors(samples, engine=engine, num_threads=num_threads) ancestors_ts = tsinfer.match_ancestors(samples, ancestor_data, engine=engine, path_compression=True, extended_checks=False) ancestors_ts = tsinfer.augment_ancestors(samples, ancestors_ts, [5, 6, 7], engine=engine) ts = tsinfer.match_samples(samples, ancestors_ts, path_compression=False, engine=engine, simplify=True) # print(ts.tables.edges) # print(ts.dump_tables()) # simplified = ts.simplify() # print("edges before = ", simplified.num_edges) # new_ancestors_ts = insert_srb_ancestors(ts) # ts = tsinfer.match_samples(samples, new_ancestors_ts, # path_compression=False, engine=engine, # simplify=True) # for tree in ts.trees(): # print(tree.interval) # print(tree.draw(format="unicode")) # print(ts.tables.edges) # for tree in ts.trees(): # print(tree.draw(format="unicode")) tsinfer.verify(samples, ts)