示例#1
0
def run_augment_ancestors(args):
    setup_logging(args)

    sample_data = tsinfer.SampleData.load(args.samples)
    ancestors_trees = get_ancestors_trees_path(args.ancestors_trees,
                                               args.samples)
    output_path = args.augmented_ancestors
    logger.info(
        "Loading ancestral genealogies from {}".format(ancestors_trees))
    ancestors_trees = tskit.load(ancestors_trees)
    progress_monitor = ProgressMonitor(enabled=args.progress,
                                       augment_ancestors=True)
    # TODO Need some error checking on these values
    n = args.num_samples
    N = sample_data.num_samples
    if n is None:
        n = int(math.ceil(10 * N / 100))

    sample_indexes = np.linspace(0, N - 1, num=n).astype(int)
    ts = tsinfer.augment_ancestors(
        sample_data,
        ancestors_trees,
        sample_indexes,
        num_threads=args.num_threads,
        path_compression=not args.no_path_compression,
        progress_monitor=progress_monitor,
    )
    logger.info("Writing output tree sequence to {}".format(output_path))
    ts.dump(output_path)
    summarise_usage()
示例#2
0
def run_augment(sample_data, ancestors_ts, subset, num_threads):
    progress_monitor = tsinfer.cli.ProgressMonitor(enabled=True,
                                                   augment_ancestors=True)
    return tsinfer.augment_ancestors(sample_data,
                                     ancestors_ts,
                                     subset,
                                     num_threads=num_threads,
                                     progress_monitor=progress_monitor)
示例#3
0
def tsinfer_dev(n,
                L,
                seed,
                num_threads=1,
                recombination_rate=1e-8,
                error_rate=0,
                engine="C",
                log_level="WARNING",
                debug=True,
                progress=False,
                path_compression=True):

    np.random.seed(seed)
    random.seed(seed)
    L_megabases = int(L * 10**6)

    # daiquiri.setup(level=log_level)

    ts = msprime.simulate(n,
                          Ne=10**4,
                          length=L_megabases,
                          recombination_rate=recombination_rate,
                          mutation_rate=1e-8,
                          random_seed=seed)
    if debug:
        print("num_sites = ", ts.num_sites)
    assert ts.num_sites > 0

    samples = tsinfer.SampleData.from_tree_sequence(ts)

    ancestor_data = tsinfer.generate_ancestors(samples,
                                               engine=engine,
                                               num_threads=num_threads)
    ancestors_ts = tsinfer.match_ancestors(samples,
                                           ancestor_data,
                                           engine=engine,
                                           path_compression=True,
                                           extended_checks=False)

    ancestors_ts = tsinfer.augment_ancestors(samples,
                                             ancestors_ts, [5, 6, 7],
                                             engine=engine)

    ts = tsinfer.match_samples(samples,
                               ancestors_ts,
                               path_compression=False,
                               engine=engine,
                               simplify=True)

    # print(ts.tables.edges)
    # print(ts.dump_tables())

    # simplified = ts.simplify()
    # print("edges before = ", simplified.num_edges)

    # new_ancestors_ts = insert_srb_ancestors(ts)
    # ts = tsinfer.match_samples(samples, new_ancestors_ts,
    #         path_compression=False, engine=engine,
    #         simplify=True)

    #     for tree in ts.trees():
    #         print(tree.interval)
    #         print(tree.draw(format="unicode"))

    # print(ts.tables.edges)
    # for tree in ts.trees():
    #     print(tree.draw(format="unicode"))

    tsinfer.verify(samples, ts)