Python simplify_tables示例，msprime.simplify_tables Python示例

示例#1

0

显示文件

 def test_one_generation_no_deep_history(self):
     N = 20
     tables = wf_sim(N=N,
                     ngens=1,
                     deep_history=False,
                     seed=self.random_seed)
     self.assertEqual(tables.nodes.num_rows, 2 * N)
     self.assertGreater(tables.edges.num_rows, 0)
     self.assertEqual(tables.sites.num_rows, 0)
     self.assertEqual(tables.mutations.num_rows, 0)
     self.assertEqual(tables.migrations.num_rows, 0)
     nodes = tables.nodes
     edges = tables.edges
     samples = np.where(nodes.flags == msprime.NODE_IS_SAMPLE)[0].astype(
         np.int32)
     msprime.sort_tables(nodes=nodes, edges=edges)
     msprime.simplify_tables(samples=samples, nodes=nodes, edges=edges)
     self.assertGreater(tables.nodes.num_rows, 0)
     self.assertGreater(tables.edges.num_rows, 0)
     ts = msprime.load_tables(nodes=nodes, edges=edges)
     for tree in ts.trees():
         all_samples = set()
         for root in tree.roots:
             root_samples = set(tree.samples(root))
             self.assertEqual(len(root_samples & all_samples), 0)
             all_samples |= root_samples
         self.assertEqual(all_samples, set(ts.samples()))

示例#2

0

显示文件

 def test_with_recurrent_mutations(self):
     # actually with only ONE site, at 0.0
     N = 10
     ngens = 100
     tables = wf_sim(N=N,
                     ngens=ngens,
                     deep_history=False,
                     seed=self.random_seed)
     msprime.sort_tables(**tables.asdict())
     ts = msprime.load_tables(**tables.asdict())
     ts = tsutil.jukes_cantor(ts, 1, 10, seed=self.random_seed)
     tables = ts.tables
     self.assertEqual(tables.sites.num_rows, 1)
     self.assertGreater(tables.mutations.num_rows, 0)
     nodes = tables.nodes
     samples = np.where(nodes.flags == msprime.NODE_IS_SAMPLE)[0].astype(
         np.int32)
     # before simplify
     for h in ts.haplotypes():
         self.assertEqual(len(h), 1)
     # after simplify
     msprime.simplify_tables(samples=samples,
                             nodes=tables.nodes,
                             edges=tables.edges,
                             sites=tables.sites,
                             mutations=tables.mutations)
     self.assertGreater(tables.nodes.num_rows, 0)
     self.assertGreater(tables.edges.num_rows, 0)
     self.assertEqual(tables.sites.num_rows, 1)
     self.assertGreater(tables.mutations.num_rows, 0)
     ts = msprime.load_tables(**tables.asdict())
     self.assertEqual(ts.sample_size, N)
     for hap in ts.haplotypes():
         self.assertEqual(len(hap), ts.num_sites)

示例#3

0

显示文件

 def test_overlapping_generations(self):
     tables = wf_sim(N=30, ngens=10, survival=0.85, seed=self.random_seed)
     self.assertGreater(tables.nodes.num_rows, 0)
     self.assertGreater(tables.edges.num_rows, 0)
     self.assertEqual(tables.sites.num_rows, 0)
     self.assertEqual(tables.mutations.num_rows, 0)
     self.assertEqual(tables.migrations.num_rows, 0)
     nodes = tables.nodes
     edges = tables.edges
     msprime.sort_tables(nodes=nodes, edges=edges)
     samples = np.where(nodes.flags == msprime.NODE_IS_SAMPLE)[0].astype(
         np.int32)
     msprime.simplify_tables(samples=samples, nodes=nodes, edges=edges)
     ts = msprime.load_tables(nodes=nodes, edges=edges)
     for tree in ts.trees():
         self.assertEqual(tree.num_roots, 1)

示例#4

0

显示文件

文件： argsimplifier.py 项目： jeromekelleher/fwdpy11_arg_example

    def simplify(self, generation, ancestry):
        # update node times:
        if self.__nodes.num_rows > 0:
            tc = self.__nodes.time
            dt = float(generation) - self.last_gc_time
            tc += dt
            self.last_gc_time = generation
            flags = np.empty([self.__nodes.num_rows], dtype=np.uint32)
            flags.fill(1)
            self.__nodes.set_columns(flags=flags,
                                     population=self.__nodes.population,
                                     time=tc)

        start = time.time()
        ancestry.prep_for_gc()
        na = np.array(ancestry.nodes, copy=False)
        ea = np.array(ancestry.edges, copy=False)
        samples = np.array(ancestry.samples, copy=False)
        flags = np.empty([len(na)], dtype=np.uint32)
        flags.fill(1)
        stop = time.time()
        self.__time_prepping += (stop - start)

        start = time.time()
        self.__nodes.append_columns(flags=flags,
                                    population=na['population'],
                                    time=na['generation'])
        self.__edges.append_columns(left=ea['left'],
                                    right=ea['right'],
                                    parent=ea['parent'],
                                    children=ea['child'],
                                    children_length=[1] * len(ea))
        stop = time.time()
        self.__time_appending += (stop - start)
        start = time.time()
        msprime.sort_tables(nodes=self.__nodes, edgesets=self.__edges)
        stop = time.time()
        self.__time_sorting += (stop - start)
        start = time.time()
        msprime.simplify_tables(samples=samples.tolist(),
                                nodes=self.__nodes,
                                edgesets=self.__edges)
        stop = time.time()
        self.__time_simplifying += (stop - start)
        return (True, self.__nodes.num_rows)

示例#5

0

显示文件

    def simplify(self, generation, tracker):
        """
        Details of taking new data, appending, and
        simplifying.

        :return: length of simplifed node table, which is next_id to use
        """
        # Update time in current nodes.
        # Is this most effficient method?
        dt = generation - self.last_gc_time
        self.nodes.set_columns(flags=self.nodes.flags,
                               population=self.nodes.population,
                               time=self.nodes.time + dt)

        # Create "flags" for new nodes.
        # This is much faster than making a list
        flags = np.empty([len(tracker.nodes)], dtype=np.uint32)
        flags.fill(1)

        # Convert time from forwards to backwards
        tracker.convert_time()

        # Update internal *Tables
        self.nodes.append_columns(flags=flags,
                                  population=tracker.nodes['population'],
                                  time=tracker.nodes['generation'])
        self.edges.append_columns(left=tracker.edges['left'],
                                  right=tracker.edges['right'],
                                  parent=tracker.edges['parent'],
                                  children=tracker.edges['child'],
                                  children_length=[1] * len(tracker.edges))

        # Sort and simplify
        msprime.sort_tables(nodes=self.nodes, edgesets=self.edges)
        msprime.simplify_tables(samples=tracker.samples.tolist(),
                                nodes=self.nodes,
                                edgesets=self.edges)
        # Return length of NodeTable,
        # which can be used as next offspring ID
        return self.nodes.num_rows

示例#6

0

显示文件

 def test_non_overlapping_generations(self):
     tables = wf_sim(N=10, ngens=10, survival=0.0, seed=self.random_seed)
     self.assertGreater(tables.nodes.num_rows, 0)
     self.assertGreater(tables.edges.num_rows, 0)
     self.assertEqual(tables.sites.num_rows, 0)
     self.assertEqual(tables.mutations.num_rows, 0)
     self.assertEqual(tables.migrations.num_rows, 0)
     nodes = tables.nodes
     edges = tables.edges
     msprime.sort_tables(nodes=nodes, edges=edges)
     samples = np.where(nodes.flags == msprime.NODE_IS_SAMPLE)[0].astype(
         np.int32)
     msprime.simplify_tables(samples=samples, nodes=nodes, edges=edges)
     ts = msprime.load_tables(nodes=nodes, edges=edges)
     # All trees should have exactly one root and the leaves should be the samples,
     # and all internal nodes should have arity > 1
     for tree in ts.trees():
         self.assertEqual(tree.num_roots, 1)
         leaves = set(tree.leaves(tree.root))
         self.assertEqual(leaves, set(ts.samples()))
         for u in tree.nodes():
             if tree.is_internal(u):
                 self.assertGreater(len(tree.children(u)), 1)

示例#7

0

显示文件

def wright_fisher(N, T, simplify_interval=1):
    """
    An implementation of algorithm W where we simplify after every generation.
    The goal here is to measure the number of edges in the tree sequence
    representing the history as a function of time.

    For simplicity we assume that the genome length L = 1 and the probability
    of death delta = 1.
    """
    L = 1
    edges = msprime.EdgeTable()
    nodes = msprime.NodeTable()
    P = [j for j in range(N)]
    for j in range(N):
        nodes.add_row(time=T, flags=1)
    t = T
    S = np.zeros(T, dtype=int)
    while t > 0:
        t -= 1
        Pp = [P[j] for j in range(N)]
        for j in range(N):
            n = len(nodes)
            nodes.add_row(time=t, flags=1)
            Pp[j] = n
            a = random.randint(0, N - 1)
            b = random.randint(0, N - 1)
            x = random.uniform(0, L)
            edges.add_row(0, x, P[a], n)
            edges.add_row(x, L, P[b], n)
        P = Pp
        if t % simplify_interval == 0:
            msprime.sort_tables(nodes=nodes, edges=edges)
            msprime.simplify_tables(Pp, nodes, edges)
            P = list(range(N))
        S[T - t - 1] = len(edges)
    # We will always simplify at t = 0, so no need for special case at the end
    return msprime.load_tables(nodes=nodes, edges=edges), S

示例#8

0

显示文件

 def test_many_generations_no_deep_history(self):
     N = 10
     ngens = 100
     tables = wf_sim(N=N,
                     ngens=ngens,
                     deep_history=False,
                     seed=self.random_seed)
     self.assertEqual(tables.nodes.num_rows, N * (ngens + 1))
     self.assertGreater(tables.edges.num_rows, 0)
     self.assertEqual(tables.sites.num_rows, 0)
     self.assertEqual(tables.mutations.num_rows, 0)
     self.assertEqual(tables.migrations.num_rows, 0)
     nodes = tables.nodes
     edges = tables.edges
     samples = np.where(nodes.flags == msprime.NODE_IS_SAMPLE)[0].astype(
         np.int32)
     msprime.sort_tables(nodes=nodes, edges=edges)
     msprime.simplify_tables(samples=samples, nodes=nodes, edges=edges)
     self.assertGreater(tables.nodes.num_rows, 0)
     self.assertGreater(tables.edges.num_rows, 0)
     # We are assuming that everything has coalesced and we have single-root trees
     ts = msprime.load_tables(nodes=nodes, edges=edges)
     for tree in ts.trees():
         self.assertEqual(tree.num_roots, 1)

示例#9

0

显示文件

文件： msprime_examples.py 项目： molpopgen/fwdpp_experimental

    def test4(self):
        self.n.set_columns(time=[1,0,0,2],flags=[msprime.NODE_IS_SAMPLE]*4)
        
        self.e.add_row(parent=0,child=1,left=0,right=0.4)
        self.e.add_row(parent=0,child=1,left=0.6,right=1.0)
        self.e.add_row(parent=0,child=2,left=0,right=1)
        self.e.add_row(parent=3,child=0,left=0,right=0.4)

        self.s.add_row(position=0.4,ancestral_state='0')
        self.m.add_row(site=0,node=3,derived_state='1')

        msprime.sort_tables(nodes=self.n,edges=self.e,
                sites=self.s,mutations=self.m)
        idmap = msprime.simplify_tables(nodes=self.n,edges=self.e,
                sites=self.s,mutations=self.m,samples=[1,2])
        ts = msprime.load_tables(nodes=self.n,edges=self.e,sites=self.s,
                mutations=self.m)
        m = ts.genotype_matrix()
        self.assertEqual(m[0:].sum(),0)

示例#10

0

显示文件

 def test_simplify_tables(self):
     seed = 71
     for ts in self.get_wf_sims(seed=seed):
         tables = ts.dump_tables()
         for nsamples in [2, 5, 10]:
             nodes = tables.nodes.copy()
             edges = tables.edges.copy()
             sites = tables.sites.copy()
             mutations = tables.mutations.copy()
             sub_samples = random.sample(list(ts.samples()),
                                         min(nsamples, ts.num_samples))
             node_map = msprime.simplify_tables(samples=sub_samples,
                                                nodes=nodes,
                                                edges=edges,
                                                sites=sites,
                                                mutations=mutations)
             small_ts = msprime.load_tables(nodes=nodes,
                                            edges=edges,
                                            sites=sites,
                                            mutations=mutations)
             self.verify_simplify(ts, small_ts, sub_samples, node_map)

示例#11

0

显示文件

文件： prototype_with_prior_history.py 项目： petrelharp/ftprime_ms

    nt.append_columns(flags=flags,
                      population=nodes['population'] + node_offset,
                      time=nodes['generation'])

    es.append_columns(left=edges['left'],
                      right=edges['right'],
                      parent=edges['parent'] + node_offset,
                      child=edges['child'] + node_offset)

    # Sort
    msprime.sort_tables(nodes=nt, edges=es)

    # Simplify: this is where the magic happens
    # PLR: since these tables aren't valid, you gotta use simplify_tables, not load them into a tree sequence
    msprime.simplify_tables(samples=samples.tolist(), nodes=nt, edges=es)

    # Create a tree sequence
    x = msprime.load_tables(nodes=nt, edges=es)

    # Lets look at the MRCAS.
    # This is where things go badly:
    MRCAS = [t.get_time(t.get_root()) for t in x.trees()]
    print(MRCAS)

    # Throw down some mutations
    # onto a sample of size nsam
    # We'll copy tables here,
    # just to see what happens.
    # PLR: these .copy()s aren't doing anything: just overwritten before
    nt_s = nt.copy()

示例#12

0

显示文件

文件： test_threads.py 项目： swamidass/msprime

 def writer():
     msprime.simplify_tables([0, 1],
                             nodes=tables.nodes,
                             edges=tables.edges,
                             sites=tables.sites,
                             mutations=tables.mutations)

示例#13

0

显示文件

文件： test_threads.py 项目： swamidass/msprime

 def writer(thread_index, results):
     msprime.simplify_tables([0, 1],
                             nodes=tables.nodes,
                             edges=tables.edges,
                             sites=tables.sites,
                             mutations=tables.mutations)

示例#14

0

显示文件

文件： prototype_with_mutations.py 项目： petrelharp/ftprime_ms

                   derived_state_length=np.ones(len(mutas['node_id']),
                                                np.uint32))

    # Sort
    msprime.sort_tables(nodes=nt, edges=es, sites=st, mutations=mt)
    print("num total mutations: ", st.num_rows)

    # Simplify: this is where the magic happens
    ## PLR: since these tables aren't valid, you gotta use simplify_tables, not load them into a tree sequence
    nt_c = nt.copy()
    es_c = es.copy()
    st_c = st.copy()
    mt_c = mt.copy()
    msprime.simplify_tables(samples=samples.tolist(),
                            nodes=nt_c,
                            edges=es_c,
                            sites=st_c,
                            mutations=mt_c)
    print("num simplified mutations: ", st_c.num_rows)
    # Create a tree sequence
    x = msprime.load_tables(nodes=nt_c, edges=es_c, sites=st_c, mutations=mt_c)

    print(max(mt_c.node))
    print(nt_c.num_rows)

    nt_s = nt_c.copy()
    es_s = es_c.copy()
    st_s = st_c.copy()
    mt_s = mt_c.copy()

    nsam_samples = np.random.choice(2 * popsize, nsam, replace=False)

示例#15

0

显示文件

文件： check_fwd_sim_data_with_msprime.py 项目： molpopgen/fwdpp_experimental

        c.append(ci[0])
        l.append(li[0])
        r.append(ri[0])

edges.set_columns(parent=p,child=c,left=l,right=r)


N=int(sys.argv[3])
#samples=[i for i in range(len(times)-2*N,len(times))] 
samples=[i for i in range(0,len(times),132)]
ts=None

A=time.time()
msprime.sort_tables(nodes=nodes,edges=edges)
B=time.time()
ts=msprime.simplify_tables(nodes=nodes,edges=edges,samples=samples)
C=time.time()

print("Sorting: ",B-A,"seconds")
print("Simplifying: ",C-B,"seconds")


with open(sys.argv[4],'w') as f:
    for i in edges:
        f.write("{} {} {:.6f} {:.6f}\n".format(i.parent,i.child,i.left,i.right,nodes[i.parent].time))
with open(sys.argv[5],'w') as f:
    for i in nodes:
        f.write("{}\n".format(i.time))

示例#16

0

显示文件

    tracker = MockAncestryTracker()
    recrate = args.rho / float(4 * args.popsize)
    samples = wf(args.popsize, simplifier, tracker, recrate,
                 SIMLEN * args.popsize)

    if len(tracker.nodes) > 0:  # Then there's stuff that didn't get GC'd
        simplifier.simplify(SIMLEN * args.popsize, tracker)

    # Local names for convenience.
    # I copy the tables here, too,
    # because I think that will be
    # done in practice: you will
    # often want to simplify and
    # ARG down to a smaller sample
    # but still have the complete
    # history of the pop'n.
    nodes = simplifier.nodes.copy()
    edges = simplifier.edges.copy()

    nsam_samples = np.random.choice(2 * args.popsize, args.nsam, replace=False)
    msprime.simplify_tables(samples=nsam_samples.tolist(),
                            nodes=nodes,
                            edges=edges)
    msp_rng = msprime.RandomGenerator(args.seed)
    mutations = msprime.MutationTable()
    sites = msprime.SiteTable()
    mutgen = msprime.MutationGenerator(msp_rng,
                                       args.theta / float(4 * args.popsize))
    mutgen.generate(nodes, edges, sites, mutations)
    print(sites.num_rows)

示例#17

0

显示文件

文件： benchmarking.py 项目： petrelharp/ftprime_ms

        # Use fwdpy11
        wf.evolve(rng, pop, params)
        # Get a sample
        s = fwdpy11.sampling.sample_separate(rng, pop, args.nsam)
    else:
        # Use this module
        simplifier, atracker, tsim = evolve_track(
            rng, pop, params, args.gc, True, args.seed, args.async, args.queue, args.qsize, args.wthreads)
        # Take times from simplifier before they change.
        times = simplifier.times
        times['fwd_sim_runtime'] = [tsim]
        times['N'] = [args.popsize]
        times['theta'] = [args.theta]
        times['rho'] = [args.rho]
        times['simplify_interval'] = [args.gc]
        d = pd.DataFrame(times)
        d.to_csv(args.outfile1, sep='\t', index=False, compression='gzip')
        # Simplify the genealogy down to a sample,
        # And throw mutations onto that sample
        msprime.simplify_tables(np.random.choice(2 * args.popsize, args.nsam,
                                                 replace=False).tolist(),
                                nodes=simplifier.nodes,
                                edges=simplifier.edges)
        msp_rng = msprime.RandomGenerator(args.seed)
        sites = msprime.SiteTable()
        mutations = msprime.MutationTable()
        mutgen = msprime.MutationGenerator(
            msp_rng, args.theta / float(4 * args.popsize))
        mutgen.generate(simplifier.nodes,
                        simplifier.edges, sites, mutations)

示例#18

0

显示文件

文件： argsimplifier.py 项目： petrelharp/ftprime_ms

    def simplify(self, generation, ancestry):
        # print(type(ancestry))
        # update node times:
        if self.__nodes.num_rows > 0:
            tc = self.__nodes.time
            dt = float(generation) - self.last_gc_time
            tc += dt
            self.last_gc_time = generation
            flags = np.ones(self.__nodes.num_rows, dtype=np.uint32)
            self.__nodes.set_columns(flags=flags,
                                     population=self.__nodes.population,
                                     time=tc)

        before = time.process_time()
        # Acquire mutex
        ancestry.acquire()
        self.reverse_time(ancestry.nodes)
        na = np.array(ancestry.nodes, copy=False)
        ea = np.array(ancestry.edges, copy=False)
        new_min_id = na['id'][0]
        new_max_id = na['id'][-1]
        delta = new_min_id - len(self.__nodes)
        if delta != 0:
            self.update_indexes(ancestry.edges, ancestry.samples, delta,
                                new_min_id, new_max_id)
        samples = np.array(ancestry.samples, copy=False)
        flags = np.ones(len(na), dtype=np.uint32)
        self.__time_prepping += time.process_time() - before

        before = time.process_time()
        clen = len(self.__nodes)
        self.__nodes.append_columns(flags=flags,
                                    population=na['population'],
                                    time=na['generation'])
        # Copy the already sorted edges to local arrays
        left = self.__edges.left[:]
        right = self.__edges.right[:]
        parent = self.__edges.parent[:]
        child = self.__edges.child[:]
        # Get the new edges and reverse them. After this, we know that all edges
        # are correctly sorted with respect to time. We then sort each time slice
        # individually, reducing the overall cost of the sort.
        new_left = ea['left'][::-1]
        new_right = ea['right'][::-1]
        new_parent = ea['parent'][::-1]
        new_child = ea['child'][::-1]

        parent_time = self.__nodes.time[new_parent]
        breakpoints = np.where(parent_time[1:] != parent_time[:-1])[0] + 1
        self.__edges.reset()
        self.__time_appending += time.process_time() - before

        before = time.process_time()
        start = 0
        for end in itertools.chain(breakpoints, [-1]):
            assert np.all(parent_time[start:end] == parent_time[start])
            self.__edges.append_columns(left=new_left[start:end],
                                        right=new_right[start:end],
                                        parent=new_parent[start:end],
                                        child=new_child[start:end])
            msprime.sort_tables(nodes=self.__nodes,
                                edges=self.__edges,
                                edge_start=start)
            start = end
        self.__time_sorting += time.process_time() - before

        # Append the old sorted edges to the table.
        self.__edges.append_columns(left=left,
                                    right=right,
                                    parent=parent,
                                    child=child)
        before = time.process_time()
        msprime.simplify_tables(samples=samples.tolist(),
                                nodes=self.__nodes,
                                edges=self.__edges)

        # Release any locks on the ancestry object
        ancestry.release()
        self.__last_edge_start = len(self.__edges)
        self.__time_simplifying += time.process_time() - before
        self.__process = True
        return (True, self.__nodes.num_rows)

示例#19

0

显示文件

import fwdpy11_arg_example.evolve_arg as ea
import msprime
import numpy as np
import sys

N = int(sys.argv[1])
rho = float(sys.argv[2])
theta = float(sys.argv[3])
gc_interval = int(sys.argv[4])
seed = int(sys.argv[5])
simplifier, atracker, tsim = ea.evolve_track_wrapper(popsize=N,
                                                     rho=rho,
                                                     seed=seed,
                                                     gc_interval=gc_interval,
                                                     mu=0.0)

print(tsim, simplifier.times)
np.random.seed(seed)

# Get a sample of size n = 10
msprime.simplify_tables(np.random.choice(2 * N, 10, replace=False).tolist(),
                        nodes=simplifier.nodes,
                        edgesets=simplifier.edgesets)
msp_rng = msprime.RandomGenerator(seed)
sites = msprime.SiteTable()
mutations = msprime.MutationTable()
mutgen = msprime.MutationGenerator(msp_rng,
                                   theta / float(4 * N))  # rho = theta
mutgen.generate(simplifier.nodes, simplifier.edgesets, sites, mutations)
print(sites.num_rows)

示例#20

0

显示文件

文件： msprime-examples.py 项目： petrelharp/ftprime_ms

def run_simplify_num_edges_benchmark(args):
    ts = msprime.load(args.file)
    np.random.seed(1)
    print("num_nodes = ", ts.num_nodes)
    print("num_edges = ", ts.num_edges)
    num_slices = 10

    tables = ts.dump_tables()
    nodes = tables.nodes
    edges = tables.edges

    node_time = nodes.time
    left = edges.left
    right = edges.right
    parent = edges.parent
    child = edges.child

    size = left.nbytes + right.nbytes + parent.nbytes + child.nbytes
    print("Total edge size = ", size / 1024**3, "GiB")
    sample_sizes = [10, 100, 1000]
    num_sample_sizes = len(sample_sizes)

    num_edges = np.zeros(num_slices * num_sample_sizes)
    simplify_time = np.zeros(num_slices * num_sample_sizes)
    sample_size = np.zeros(num_slices * num_sample_sizes)
    slice_size = ts.num_edges // num_slices

    j = 0
    for N in sample_sizes:
        for start in range(ts.num_edges - slice_size, 0, -slice_size):
            max_node = np.max(child[start:])
            samples = np.arange(max_node - N, max_node, dtype=np.int32)
            subset_nodes = msprime.NodeTable()
            subset_nodes.set_columns(time=node_time[:max_node + 1],
                                     flags=np.ones(max_node + 1,
                                                   dtype=np.uint32))
            subset_edges = msprime.EdgeTable()
            subset_edges.set_columns(left=left[start:],
                                     right=right[start:],
                                     parent=parent[start:],
                                     child=child[start:])
            before = time.process_time()
            msprime.simplify_tables(samples=samples,
                                    nodes=subset_nodes,
                                    edges=subset_edges)
            duration = time.process_time() - before
            num_edges[j] = ts.num_edges - start
            simplify_time[j] = duration
            sample_size[j] = N
            print(N, num_edges[j], duration, num_edges[j] / duration,
                  "per second")
            j += 1

    df = pd.DataFrame({
        "sample_size": sample_size,
        "num_edges": num_edges,
        "time": simplify_time
    })
    df.to_csv("data/simplify_num_edges.dat")

    for N in sample_sizes:
        index = sample_size == N

        plt.plot(num_edges[index], simplify_time[index], marker="o")
        plt.xlabel("num edges")
        plt.ylabel("Time to simplify (s)")
        plt.savefig("simplify_num_edges.png")