示例#1
0
 def test_linear_space(self):
     # This makes ~1700 trees, and previously caused a failure
     ts = msprime.simulate(
         sample_size=10,
         length=2e6,
         Ne=10000,
         mutation_rate=1e-8,
         recombination_rate=1e-8,
         random_seed=11,
     )
     priors = tsdate.build_prior_grid(ts,
                                      timepoints=10,
                                      approximate_priors=None)
     dated_ts = tsdate.date(ts,
                            Ne=10000,
                            mutation_rate=1e-8,
                            priors=priors,
                            probability_space=LIN)
     maximized_ts = tsdate.date(
         ts,
         Ne=10000,
         mutation_rate=1e-8,
         priors=priors,
         method="maximization",
         probability_space=LIN,
     )
     self.ts_equal_except_times(ts, dated_ts)
     self.ts_equal_except_times(ts, maximized_ts)
示例#2
0
def main():
    description = """Simple CLI wrapper for tsdate
        tskit version: {}
        tsdate version: {}""".format(tskit.__version__, tsdate.__version__)
    parser = argparse.ArgumentParser(
        description=description,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--verbosity', '-v', action='count', default=0)
    parser.add_argument("input", help="The input tree sequence file name")
    parser.add_argument(
        "output", help="The path to write the output tree sequence file to")
    parser.add_argument('Ne', type=float, help="Effective population size")
    parser.add_argument("--mutation-rate",
                        default=1e-8,
                        type=float,
                        help="Mutation rate")
    parser.add_argument("-V",
                        "--version",
                        action='version',
                        version=description)

    args = parser.parse_args()

    if not os.path.isfile(args.input):
        raise ValueError("No input tree sequence file")
    input_ts = tskit.load(args.input)
    prior = tsdate.build_prior_grid(input_ts, approximate_priors=True)
    ts = tsdate.date(input_ts,
                     args.Ne,
                     mutation_rate=args.mutation_rate,
                     priors=prior)
    ts.dump(args.output)
示例#3
0
 def test_simple_sim_1_tree(self):
     ts = msprime.simulate(8, mutation_rate=5, random_seed=2)
     for use_times in [True, False]:
         sample_data = tsinfer.SampleData.from_tree_sequence(ts, use_times=use_times)
         inferred_ts = tsinfer.infer(sample_data)
         dated_ts = tsdate.date(inferred_ts, Ne=1, mutation_rate=5)
         self.assertTrue(
             all([a == b for a, b in zip(ts.haplotypes(), dated_ts.haplotypes())]))
示例#4
0
def run_date(args):
    try:
        ts = tskit.load(args.tree_sequence)
    except tskit.FileFormatError as ffe:
        exit("Error loading '{}: {}".format(args.tree_sequence, ffe))
    dated_ts = tsdate.date(
        ts, args.Ne, mutation_rate=args.mutation_rate,
        recombination_rate=args.recombination_rate,
        probability_space=args.probability_space, method=args.method,
        eps=args.epsilon, num_threads=args.num_threads,
        ignore_oldest_root=args.ignore_oldest, progress=args.progress)
    dated_ts.dump(args.output)
示例#5
0
 def compare_python_api(self, input_ts, cmd, Ne, mutation_rate, method):
     with tempfile.TemporaryDirectory() as tmpdir:
         input_filename = pathlib.Path(tmpdir) / "input.trees"
         input_ts.dump(input_filename)
         output_filename = pathlib.Path(tmpdir) / "output.trees"
         full_cmd = "date " + str(input_filename) + f" {output_filename} " + cmd
         cli.tsdate_main(full_cmd.split())
         output_ts = tskit.load(output_filename)
     dated_ts = tsdate.date(input_ts, Ne=Ne, mutation_rate=mutation_rate,
                            method=method)
     print(dated_ts.tables.nodes.time, output_ts.tables.nodes.time)
     self.assertTrue(np.array_equal(dated_ts.tables.nodes.time,
                                    output_ts.tables.nodes.time))
示例#6
0
 def test_truncated_ts(self):
     Ne = 1e2
     mu = 2e-4
     ts = msprime.simulate(
         10, Ne=Ne, length=400, recombination_rate=1e-4, mutation_rate=mu,
         random_seed=12)
     truncated_ts = utility_functions.truncate_ts_samples(
         ts, average_span=200, random_seed=123)
     dated_ts = tsdate.date(truncated_ts, Ne=Ne, mutation_rate=mu)
     # We should ideally test whether *haplotypes* are the same here
     # in case allele encoding has changed. But haplotypes() doesn't currently
     # deal with missing data
     self.ts_equal_except_times(truncated_ts, dated_ts)
示例#7
0
def evaluate_tsdate_accuracy(parameter,
                             parameters_arr,
                             node_mut=False,
                             inferred=True,
                             prior_distr='lognorm',
                             progress=True):
    Ne = 10000
    if node_mut and inferred:
        raise ValueError(
            "cannot evaluate node accuracy on inferred tree sequence")
    mutation_rate = 1e-8
    recombination_rate = 1e-8
    all_results = {
        i: {i: []
            for i in ['io', 'max', 'true_times']}
        for i in list(map(str, parameters_arr))
    }

    random_seeds = range(1, 6)

    if inferred:
        inferred_progress = 'using tsinfer'
    else:
        inferred_progress = 'true topology'
    if node_mut:
        node_mut_progress = 'comparing true and estimated node times'
    else:
        node_mut_progress = 'comparing true and estimated mutation times'
    for index, param in tqdm(enumerate(parameters_arr),
                             desc='Testing ' + parameter + " " +
                             inferred_progress + ". Evaluation by " +
                             node_mut_progress,
                             total=len(parameters_arr),
                             disable=not progress):
        for random_seed in random_seeds:
            if parameter == 'sample_size':
                sample_size = param
            else:
                sample_size = 100
            ts = msprime.simulate(sample_size=sample_size,
                                  Ne=Ne,
                                  length=1e6,
                                  mutation_rate=mutation_rate,
                                  recombination_rate=recombination_rate,
                                  random_seed=random_seed)

            if parameter == 'length':
                ts = msprime.simulate(sample_size=sample_size,
                                      Ne=Ne,
                                      length=param,
                                      mutation_rate=mutation_rate,
                                      recombination_rate=recombination_rate,
                                      random_seed=random_seed)
            if parameter == 'mutation_rate':
                mutated_ts = msprime.mutate(ts,
                                            rate=param,
                                            random_seed=random_seed)
            else:
                mutated_ts = msprime.mutate(ts,
                                            rate=mutation_rate,
                                            random_seed=random_seed)
            if inferred:
                sample_data = tsinfer.formats.SampleData.from_tree_sequence(
                    mutated_ts, use_times=False)
                target_ts = tsinfer.infer(sample_data).simplify()
            else:
                target_ts = mutated_ts

            if parameter == 'mutation_rate':
                io_dated = tsdate.date(target_ts,
                                       mutation_rate=param,
                                       Ne=Ne,
                                       progress=False,
                                       method='inside_outside')
                max_dated = tsdate.date(target_ts,
                                        mutation_rate=param,
                                        Ne=Ne,
                                        progress=False,
                                        method='maximization')
            elif parameter == 'timepoints':
                prior = tsdate.build_prior_grid(target_ts,
                                                timepoints=param,
                                                approximate_prior=True,
                                                prior_distribution=prior_distr,
                                                progress=False)
                io_dated = tsdate.date(target_ts,
                                       mutation_rate=mutation_rate,
                                       prior=prior,
                                       Ne=Ne,
                                       progress=False,
                                       method='inside_outside')
                max_dated = tsdate.date(target_ts,
                                        mutation_rate=mutation_rate,
                                        prior=prior,
                                        Ne=Ne,
                                        progress=False,
                                        method='maximization')
            else:
                io_dated = tsdate.date(target_ts,
                                       mutation_rate=mutation_rate,
                                       Ne=Ne,
                                       progress=False,
                                       method='inside_outside')
                max_dated = tsdate.date(target_ts,
                                        mutation_rate=mutation_rate,
                                        Ne=Ne,
                                        progress=False,
                                        method='maximization')
            if node_mut and not inferred:
                all_results[str(param)]['true_times'].append(
                    mutated_ts.tables.nodes.time[ts.num_samples:])
                all_results[str(param)]['io'].append(
                    io_dated.tables.nodes.time[ts.num_samples:])
                all_results[str(param)]['max'].append(
                    max_dated.tables.nodes.time[ts.num_samples:])
            else:
                all_results[str(param)]['true_times'].append(
                    mutated_ts.tables.nodes.time[
                        mutated_ts.tables.mutations.node])
                all_results[str(param)]['io'].append(
                    io_dated.tables.nodes.time[io_dated.tables.mutations.node])
                all_results[str(param)]['max'].append(
                    max_dated.tables.nodes.time[
                        max_dated.tables.mutations.node])

    return all_results, prior_distr, inferred, node_mut
示例#8
0
 def test_simple_sim_1_tree(self):
     ts = msprime.simulate(8, mutation_rate=5, random_seed=2)
     dated_ts = tsdate.date(ts, Ne=1, mutation_rate=5)
     self.ts_equal_except_times(ts, dated_ts)