Python TableCollection示例，tskit.TableCollection Python示例

示例#1

0

显示文件

 def verify_simple_model(
     self, n, seed=1, recombination_rate=None, length=None, recombination_map=None
 ):
     ts1 = msprime.simulate(
         n,
         random_seed=seed,
         recombination_rate=recombination_rate,
         length=length,
         recombination_map=recombination_map,
         model=self.model,
     )
     tables = tskit.TableCollection(ts1.sequence_length)
     tables.populations.add_row()
     for _ in range(n):
         tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0)
     ts2 = msprime.simulate(
         from_ts=tables.tree_sequence(),
         start_time=0,
         random_seed=seed,
         recombination_rate=recombination_rate,
         recombination_map=recombination_map,
         model=self.model,
     )
     tables1 = ts1.dump_tables()
     tables2 = ts2.dump_tables()
     assert len(tables1.populations)
     assert len(tables2.populations)
     # TODO use updated tskit APIs for comparisons.
     tables1.populations.clear()
     tables2.populations.clear()
     tables1.populations.metadata_schema = ""
     tables2.populations.metadata_schema = ""
     tables1.provenances.clear()
     tables2.provenances.clear()
     assert tables1 == tables2

示例#2

0

显示文件

文件： clean_implementation.py 项目： ForwardSimulation/edge_buffering_experiments

def brute_force_merge_and_simplify(pstate):
    tc = tskit.TableCollection(pstate.tables.sequence_length)

    flags = np.zeros(len(pstate.tables.nodes), dtype=np.uint32)
    for p in pstate.parents:
        flags[p.n0] = 1
        flags[p.n1] = 1
    tc.nodes.set_columns(
        flags=flags,
        time=-1.0 *
        (pstate.tables.nodes.time - pstate.tables.nodes.time.max()),
    )

    tc.edges.set_columns(
        pstate.tables.edges.left,
        pstate.tables.edges.right,
        pstate.tables.edges.parent,
        pstate.tables.edges.child,
    )
    for eb in pstate.buffered_edges:
        for i in eb[0] + eb[1]:
            tc.edges.add_row(*i)
    tc.sort()
    tc.simplify()
    return tc.tree_sequence()

示例#3

0

显示文件

文件： continuous_simplify.py 项目： molpopgen/dynamic-ts

    def export(self):
        """
        Exports the edges to a tskit tree sequence.

        NOTE: the individuals themselves are sorted by birth order.
        The segments w/in an individual are/should be/maybe
        quite close to sorted.  Thus, a full table sort
        is probably wasteful and we sort segments w/in
        individuals instead, which can be trivially
        parallelized across individuals.
        """
        tables = tskit.TableCollection(self.sequence_length)
        # Map the individuals to their indexes to make debug easier.
        individuals = {
            ind.index: j
            for j, ind in enumerate(reversed(self.individuals))
        }
        for ind in reversed(self.individuals):
            # print("adding", ind)
            ret = tables.nodes.add_row(
                flags=tskit.NODE_IS_SAMPLE if ind.is_alive is True else 0,
                time=self.time - ind.time)

        for ind in reversed(self.individuals):
            segments = sorted(
                ind.segments,
                key=lambda x:
                (-x.child.time, individuals[x.child.index], x.left))
            for seg in segments:
                tables.edges.add_row(left=seg.left,
                                     right=seg.right,
                                     parent=individuals[ind.index],
                                     child=individuals[seg.child.index])
        # print(tables)
        return tables.tree_sequence()

示例#4

0

显示文件

def decompress_zarr(root):
    tables = tskit.TableCollection(root.attrs["sequence_length"])
    coordinates = root["coordinates"][:]

    tables.individuals.set_columns(
        flags=root["individuals/flags"],
        location=root["individuals/location"],
        location_offset=root["individuals/location_offset"],
        metadata=root["individuals/metadata"],
        metadata_offset=root["individuals/metadata_offset"])

    tables.nodes.set_columns(
        flags=root["nodes/flags"],
        time=root["nodes/time"],
        population=root["nodes/population"],
        individual=root["nodes/individual"],
        metadata=root["nodes/metadata"],
        metadata_offset=root["nodes/metadata_offset"])

    tables.edges.set_columns(
        left=coordinates[root["edges/left"]],
        right=coordinates[root["edges/right"]],
        parent=root["edges/parent"],
        child=root["edges/child"])

    tables.migrations.set_columns(
        left=coordinates[root["migrations/left"]],
        right=coordinates[root["migrations/right"]],
        node=root["migrations/node"],
        source=root["migrations/source"],
        dest=root["migrations/dest"],
        time=root["migrations/time"])

    tables.sites.set_columns(
        position=coordinates[root["sites/position"]],
        ancestral_state=root["sites/ancestral_state"],
        ancestral_state_offset=root["sites/ancestral_state_offset"],
        metadata=root["sites/metadata"],
        metadata_offset=root["sites/metadata_offset"])

    tables.mutations.set_columns(
        site=root["mutations/site"],
        node=root["mutations/node"],
        parent=root["mutations/parent"],
        derived_state=root["mutations/derived_state"],
        derived_state_offset=root["mutations/derived_state_offset"],
        metadata=root["mutations/metadata"],
        metadata_offset=root["mutations/metadata_offset"])

    tables.populations.set_columns(
        metadata=root["populations/metadata"],
        metadata_offset=root["populations/metadata_offset"])

    tables.provenances.set_columns(
        timestamp=root["provenances/timestamp"],
        timestamp_offset=root["provenances/timestamp_offset"],
        record=root["provenances/record"],
        record_offset=root["provenances/record_offset"])

    return tables.tree_sequence()

示例#5

0

显示文件

    def test_missing_data_samples(self):
        tables = tskit.TableCollection(1.0)
        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
        tables.sites.add_row(0.5, "A")
        tables.mutations.add_row(0, 0, "T")
        ts = tables.tree_sequence()

        # If we have no samples we still get a list of variants.
        variants = list(ts.variants(samples=[]))
        assert len(variants[0].genotypes) == 0
        assert not variants[0].has_missing_data
        assert variants[0].alleles == ("A", "T")

        # If we have a single sample that's not missing, there's no
        # missing data.
        variants = list(ts.variants(samples=[0]))
        assert len(variants[0].genotypes) == 1
        assert variants[0].genotypes[0] == 1
        assert not variants[0].has_missing_data
        assert variants[0].alleles == ("A", "T")

        # If we have a single sample that is missing, there is
        # missing data.
        variants = list(ts.variants(samples=[1]))
        assert len(variants[0].genotypes) == 1
        assert variants[0].genotypes[0] == -1
        assert variants[0].has_missing_data
        assert variants[0].alleles == ("A", "T", None)

示例#6

0

显示文件

 def test_no_edges_mutations(self):
     tables = tskit.TableCollection(1)
     for _ in range(2):
         tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE)
     tables.sites.add_row(0, "A")
     tables.mutations.add_row(0, 0, "T")
     self.verify(tables.tree_sequence())

示例#7

0

显示文件

文件： test_one_to_one.py 项目： jradrion/tsencode

    def DecodeTree(self, A):
        """
        Take in the array produced by 'EncodeTreeSequence()' and return a
        the inverse operation to produce a TreeSequence() for testing.
        """

        num_rows = A.shape[0]
        num_columns = A.shape[1]
        tables = tskit.TableCollection(sequence_length=num_columns)
        node_table = tables.nodes
        edge_table = tables.edges
        pop_table = tables.populations
        pop_table.add_row()
        for row in range(num_rows):
            flag = 0
            time = A[row, 0, 0]
            if (time == 0.0):
                flag = 1
            node_table.add_row(flags=flag, time=float(time), population=0)
            for column in range(num_columns):
                top = A[row, column, 1]
                bot = A[row, column, 2]
                # for padding, we don't add edges
                if ((top < 0) | (bot < 0)):
                    continue
                parent = GlueInt8(top, bot)
                edge_table.add_row(left=column,
                                   right=column + 1,
                                   parent=parent,
                                   child=row)  # NOQA
        tables.sort()
        tables.simplify()
        ts = tables.tree_sequence()
        return ts

示例#8

0

显示文件

    def export(self):
        """
        Exports the edges to a tskit tree sequence.
        """
        tables = tskit.TableCollection(self.sequence_length)
        # Map the individuals to their indexes to make debug easier.
        # THIS IS A TERRIBLE IDEA!!!
        sorted_individuals = sorted(self.all_reachable(), key=lambda x: x.index)
        next_ind = 0
        for ind in sorted_individuals:
            while ind.index != next_ind:
                # Add in a padding node.
                tables.nodes.add_row(flags=0, time=0)
                next_ind += 1
            ret = tables.nodes.add_row(
                flags=tskit.NODE_IS_SAMPLE if ind.is_alive is True else 0,
                time=self.time - ind.time)
            assert ret == ind.index
            next_ind += 1

        for ind in sorted_individuals:
            for child, segments in ind.children.items():
                for seg in segments:
                    tables.edges.add_row(
                        left=seg.left, right=seg.right,
                        parent=ind.index, child=child.index)
        # Can't be bothered doing the sorting above to get rid of this,
        # but it's trivial.
        tables.sort()
        return tables.tree_sequence()

示例#9

0

显示文件

    def test_equal_internal_node_time(self):
        #     6
        #   ┏━┻━┓
        #   4   5
        #  ┏┻┓ ┏┻┓
        #  0 1 2 3
        tables = tskit.TableCollection(1)
        for _ in range(4):
            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
        tables.nodes.add_row(0, time=1)
        tables.nodes.add_row(0, time=1)
        tables.nodes.add_row(0, time=2)

        tables.edges.add_row(0, 1, 4, 0)
        tables.edges.add_row(0, 1, 4, 1)
        tables.edges.add_row(0, 1, 5, 2)
        tables.edges.add_row(0, 1, 5, 3)
        tables.edges.add_row(0, 1, 6, 4)
        tables.edges.add_row(0, 1, 6, 5)
        tables.sort()
        ts = tables.tree_sequence()
        msout = tsconvert.to_ms(ts)
        # The current algorithm assumes node times are unique
        with pytest.raises(ValueError):
            tsconvert.from_ms(msout)

示例#10

0

显示文件

文件： tsutil.py 项目： daniel-goldstein/tskit

def caterpillar_tree(n, num_sites=0, num_mutations=1):
    """
    Returns caterpillar tree with n samples. For each of the sites and
    path of at most n - 2 mutations are put down along the internal
    nodes. Each site gets exactly the same set of mutations.
    """
    if num_sites > 0 and num_mutations > n - 2:
        raise ValueError("At most n - 2 mutations allowed")
    tables = tskit.TableCollection(1)
    for j in range(n):
        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
    last_node = 0
    # Add the internal nodes
    for j in range(n - 1):
        u = tables.nodes.add_row(time=j + 1)
        tables.edges.add_row(0, tables.sequence_length, u, last_node)
        tables.edges.add_row(0, tables.sequence_length, u, j + 1)
        last_node = u
    for j in range(num_sites):
        tables.sites.add_row(position=(j + 1) / n, ancestral_state="0")
        node = 2 * n - 3
        state = 0
        for k in range(num_mutations):
            state = (state + 1) % 2
            tables.mutations.add_row(site=j,
                                     derived_state=str(state),
                                     node=node)
            node -= 1

    tables.sort()
    tables.build_index()
    tables.compute_mutation_parents()
    return tables.tree_sequence()

示例#11

0

显示文件

    def test_multiple_mrcas(self):
        tables = tskit.TableCollection(sequence_length=1)
        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0,
                             individual=-1, time=0)
        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0,
                             individual=-1, time=0)

        tables.edges.add_row(left=0, right=0.5, parent=2, child=1)
        tables.edges.add_row(left=0.5, right=1, parent=3, child=1)
        tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0,
                             individual=-1, time=0.1)
        tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0,
                             individual=-1, time=0.1)

        tables.edges.add_row(left=0, right=0.5, parent=4, child=0)
        tables.edges.add_row(left=0.5, right=1, parent=5, child=0)
        tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0,
                             individual=-1, time=0.15)
        tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0,
                             individual=-1, time=0.15)

        tables.edges.add_row(left=0, right=0.5, parent=6, child=2)
        tables.edges.add_row(left=0, right=0.5, parent=6, child=4)
        tables.nodes.add_row(flags=0, population=0, individual=-1, time=0.5)

        tables.edges.add_row(left=0.5, right=1, parent=7, child=3)
        tables.edges.add_row(left=0.5, right=1, parent=7, child=5)
        tables.nodes.add_row(flags=0, population=0, individual=-1, time=1)

        tables.mutations.add_row(site=0, node=1, derived_state="1")
        tables.mutations.add_row(site=1, node=4, derived_state="1")
        tables.mutations.add_row(site=2, node=3, derived_state="1")

        tables.sites.add_row(0.1, "0")
        tables.sites.add_row(0.2, "0")
        tables.sites.add_row(0.7, "0")

        tables.populations.add_row()

        arg = tables.tree_sequence()

        rho = np.arange(0.1, 10, 0.1)
        for r in rho:
            log_arg_likelihood_exact = math.log(r) - (1 + 2 * r) * 0.1
            log_arg_likelihood_exact += math.log(r) - (3 + 2 * r) * 0.05
            log_arg_likelihood_exact -= (6 + 2 * r) * 0.35
            log_arg_likelihood_exact -= (1 + r) * 0.5
            self.assertTrue(math.isclose(log_arg_likelihood_exact,
                                         msprime.log_arg_likelihood(arg, r)))

        theta = np.arange(0.1, 10, 0.1)
        tree_length = 1.5
        for t in theta:
            unnormalised_mutation_ll_exact = (3 * math.log(tree_length * t) -
                                              tree_length * t)
            unnormalised_mutation_ll_exact -= math.log(tree_length)
            unnormalised_mutation_ll_exact -= 2 * math.log(2 * tree_length)
            self.assertTrue(math.isclose(
                            unnormalised_mutation_ll_exact,
                            msprime.unnormalised_log_mutation_likelihood(arg, t)))

示例#12

0

显示文件

 def test_zero_has_parent(self):
     tables = tskit.TableCollection(1)
     tables.nodes.add_row(time=1, flags=0)
     tables.nodes.add_row(time=2, flags=0)
     tables.edges.add_row(0, 1, 1, 0)
     with self.assertRaises(ValueError):
         tsinfer.check_ancestors_ts(tables.tree_sequence())

示例#13

0

显示文件

文件： combinatorics.py 项目： Chris1221/tskit

    def to_tsk_tree(self):
        seq_length = 1
        tables = tskit.TableCollection(seq_length)

        def add_node(node):
            if node.is_leaf():
                assert node.label is not None
                return node.label

            child_ids = [add_node(child) for child in node.children]
            # Arbitrarily set parent time +1 from their oldest child
            max_child_time = max(tables.nodes.time[c] for c in child_ids)
            parent_id = tables.nodes.add_row(time=max_child_time + 1)
            for child_id in child_ids:
                tables.edges.add_row(0, seq_length, parent_id, child_id)

            return parent_id

        for _ in range(self.num_leaves):
            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
        add_node(self)

        # The way in which we're inserting nodes doesn't necessarily
        # adhere to the ordering constraint on edges, so we have
        # to sort.
        tables.sort()
        return tables.tree_sequence().first()

示例#14

0

显示文件

 def test_fromdict_all_values_empty(self):
     d = tskit.TableCollection(1).asdict()
     d["reference_sequence"] = dict(
         data="", url="", metadata_schema="", metadata=b""
     )
     tables = tskit.TableCollection.fromdict(d)
     assert not tables.has_reference_sequence()

示例#15

0

显示文件

 def test_missing_data(self):
     tables = tskit.TableCollection(1.0)
     tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
     tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
     tables.sites.add_row(0.5, "A")
     ts = tables.tree_sequence()
     self.assertRaises(ValueError, list,
                       ts.haplotypes(missing_data_character="A"))
     for c in ("-", ".", "a"):
         h = list(ts.haplotypes(missing_data_character=c))
         self.assertEqual(h, [c, c])
     h = list(ts.haplotypes(isolated_as_missing=True))
     self.assertEqual(h, ["-", "-"])
     h = list(ts.haplotypes(isolated_as_missing=False))
     self.assertEqual(h, ["A", "A"])
     h = list(ts.haplotypes())
     self.assertEqual(h, ["-", "-"])
     # Test deprecated method
     h = list(ts.haplotypes(impute_missing_data=True))
     self.assertEqual(h, ["A", "A"])
     h = list(ts.haplotypes(impute_missing_data=False))
     self.assertEqual(h, ["-", "-"])
     h = list(
         ts.haplotypes(isolated_as_missing=True, impute_missing_data=True))
     self.assertEqual(h, ["-", "-"])
     h = list(
         ts.haplotypes(isolated_as_missing=True, impute_missing_data=False))
     self.assertEqual(h, ["-", "-"])
     h = list(
         ts.haplotypes(isolated_as_missing=False, impute_missing_data=True))
     self.assertEqual(h, ["A", "A"])
     h = list(
         ts.haplotypes(isolated_as_missing=False,
                       impute_missing_data=False))
     self.assertEqual(h, ["A", "A"])

示例#16

0

显示文件

    def test_missing_data_samples(self):
        tables = tskit.TableCollection(1.0)
        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
        tables.sites.add_row(0.5, "A")
        tables.mutations.add_row(0, 0, "T")
        ts = tables.tree_sequence()

        # If we have no samples we still get a list of variants.
        variants = list(ts.variants(samples=[]))
        self.assertEqual(len(variants[0].genotypes), 0)
        self.assertFalse(variants[0].has_missing_data)
        self.assertEqual(variants[0].alleles, ("A", "T"))

        # If we have a single sample that's not missing, there's no
        # missing data.
        variants = list(ts.variants(samples=[0]))
        self.assertEqual(len(variants[0].genotypes), 1)
        self.assertEqual(variants[0].genotypes[0], 1)
        self.assertFalse(variants[0].has_missing_data)
        self.assertEqual(variants[0].alleles, ("A", "T"))

        # If we have a single sample that is missing, there is
        # missing data.
        variants = list(ts.variants(samples=[1]))
        self.assertEqual(len(variants[0].genotypes), 1)
        self.assertEqual(variants[0].genotypes[0], -1)
        self.assertTrue(variants[0].has_missing_data)
        self.assertEqual(variants[0].alleles, ("A", "T", None))

示例#17

0

显示文件

 def test_two_populations_migration(self):
     n = 10
     seed = 1234
     ts1 = msprime.simulate(
         population_configurations=[
             msprime.PopulationConfiguration(n),
             msprime.PopulationConfiguration(0),
         ],
         migration_matrix=[[0, 1], [1, 0]],
         random_seed=seed,
     )
     tables = tskit.TableCollection(1)
     tables.populations.add_row()
     tables.populations.add_row()
     for _ in range(n):
         tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0)
     ts2 = msprime.simulate(
         from_ts=tables.tree_sequence(),
         start_time=0,
         population_configurations=[
             msprime.PopulationConfiguration(),
             msprime.PopulationConfiguration(),
         ],
         migration_matrix=[[0, 1], [1, 0]],
         random_seed=seed,
     )
     tables1 = ts1.dump_tables()
     tables2 = ts2.dump_tables()
     tables1.provenances.clear()
     tables2.provenances.clear()
     self.assertEqual(tables1, tables2)

示例#18

0

显示文件

 def verify_simple_model(
     self, n, seed=1, recombination_rate=None, length=None, recombination_map=None
 ):
     ts1 = msprime.simulate(
         n,
         random_seed=seed,
         recombination_rate=recombination_rate,
         length=length,
         recombination_map=recombination_map,
         model=self.model,
     )
     tables = tskit.TableCollection(ts1.sequence_length)
     tables.populations.add_row()
     for _ in range(n):
         tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0)
     ts2 = msprime.simulate(
         from_ts=tables.tree_sequence(),
         start_time=0,
         random_seed=seed,
         recombination_rate=recombination_rate,
         recombination_map=recombination_map,
         model=self.model,
     )
     tables1 = ts1.dump_tables()
     tables2 = ts2.dump_tables()
     tables1.provenances.clear()
     tables2.provenances.clear()
     self.assertEqual(tables1, tables2)

示例#19

0

显示文件

文件： test_parsimony.py 项目： mmosmond/tskit

def felsenstein_tables():
    """
    Return tables for the example tree.
    """
    #
    #     8
    #   ┏━┻━━┓
    #   ┃    7
    #   ┃   ┏┻┓
    #   6   ┃ ┃
    # ┏━┻┓  ┃ ┃
    # ┃  5  ┃ ┃
    # ┃ ┏┻┓ ┃ ┃
    # 2 3 4 0 1
    #
    tables = tskit.TableCollection(1)
    for _ in range(5):
        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
    for j in range(4):
        tables.nodes.add_row(flags=0, time=j + 1)
    tables.edges.add_row(0, 1, 7, 0)
    tables.edges.add_row(0, 1, 7, 1)
    tables.edges.add_row(0, 1, 6, 2)
    tables.edges.add_row(0, 1, 5, 3)
    tables.edges.add_row(0, 1, 5, 4)
    tables.edges.add_row(0, 1, 6, 5)
    tables.edges.add_row(0, 1, 8, 6)
    tables.edges.add_row(0, 1, 8, 7)
    tables.sort()
    return tables

示例#20

0

显示文件

文件： hudson1990.py 项目： sam217pa/simbook

def simulate(nsam: int):
    """
    The linear-time algorithm of Hudson, 1990,
    adapted to use tree sequences

    The citation for this algorithm is
    Hudson, Richard R. 1990.
    “Gene Genealogies and the Coalescent Process.”
    Oxford Surveys in Evolutionary Biology 7 (1): 44.

    Time is scaled in units of 2N generations.

    :param nsam: The sample size
    :type nsam: int
    """
    tc = tskit.TableCollection(1)

    nodes = np.arange(2 * nsam - 1, dtype=np.int32)
    for i in range(nsam):
        tc.nodes.add_row(time=0.0, flags=tskit.NODE_IS_SAMPLE)
    time = 0.0
    n = nsam
    while n > 1:
        # Generate time to next coalescent event,
        # in units of 2N generations.
        rcoal = (n * (n - 1)) / 2.
        tcoal = np.random.exponential(1. / rcoal)
        time += tcoal

        # Register a new ancestor node.
        # The node is not a sample,
        # so its flag is zero
        tc.nodes.add_row(time=time, flags=0)
        # This is the index of the
        # ancestor node
        ancestor = 2 * nsam - n

        # Perform the swap steps
        # of the algorithm
        p = np.random.choice(n, 1)[0]
        c1 = nodes[p]
        nodes[p] = nodes[n - 1]
        p = np.random.choice(n - 1, 1)[0]
        c2 = nodes[p]
        nodes[p] = nodes[ancestor]

        # Both c1 an c2 have the same parental
        # node (nodes[ancestor]).  An edge
        # table requires that child nodes
        # be sorted in increasing order
        # per parent, so we enforce that here
        if c1 > c2:
            c1, c2 = c2, c1
        # Record the edges
        tc.edges.add_row(parent=ancestor, child=c1, left=0.0, right=1.0)
        tc.edges.add_row(parent=ancestor, child=c2, left=0.0, right=1.0)
        n -= 1

    return tc.tree_sequence()

示例#21

0

显示文件

文件： formats.py 项目： saunack/tskit

def _load_legacy_hdf5_v2(root, remove_duplicate_positions):
    # Get the coalescence records
    trees_group = root["trees"]
    old_timestamp = datetime.datetime.min.isoformat()
    provenances = tskit.ProvenanceTable()
    provenances.add_row(
        timestamp=old_timestamp,
        record=_get_v2_provenance("generate_trees", trees_group.attrs),
    )
    num_rows = trees_group["node"].shape[0]
    index = np.arange(num_rows, dtype=int)
    parent = np.zeros(2 * num_rows, dtype=np.int32)
    parent[2 * index] = trees_group["node"]
    parent[2 * index + 1] = trees_group["node"]
    left = np.zeros(2 * num_rows, dtype=np.float64)
    left[2 * index] = trees_group["left"]
    left[2 * index + 1] = trees_group["left"]
    right = np.zeros(2 * num_rows, dtype=np.float64)
    right[2 * index] = trees_group["right"]
    right[2 * index + 1] = trees_group["right"]
    child = np.array(trees_group["children"], dtype=np.int32).flatten()

    tables = tskit.TableCollection(np.max(right))
    tables.edges.set_columns(left=left,
                             right=right,
                             parent=parent,
                             child=child)

    cr_node = np.array(trees_group["node"], dtype=np.int32)
    num_nodes = max(np.max(child), np.max(cr_node)) + 1
    sample_size = np.min(cr_node)
    flags = np.zeros(num_nodes, dtype=np.uint32)
    population = np.zeros(num_nodes, dtype=np.int32)
    time = np.zeros(num_nodes, dtype=np.float64)
    flags[:sample_size] = tskit.NODE_IS_SAMPLE
    cr_population = np.array(trees_group["population"], dtype=np.int32)
    cr_time = np.array(trees_group["time"])
    time[cr_node] = cr_time
    population[cr_node] = cr_population
    if "samples" in root:
        samples_group = root["samples"]
        population[:sample_size] = samples_group["population"]
        if "time" in samples_group:
            time[:sample_size] = samples_group["time"]
    tables.nodes.set_columns(flags=flags, population=population, time=time)
    _set_populations(tables)

    if "mutations" in root:
        mutations_group = root["mutations"]
        _convert_hdf5_mutations(mutations_group, tables.sites,
                                tables.mutations, remove_duplicate_positions)
        provenances.add_row(
            timestamp=old_timestamp,
            record=_get_v2_provenance("generate_mutations",
                                      mutations_group.attrs),
        )
    tables.provenances.add_row(_get_upgrade_provenance(root))
    tables.sort()
    return tables.tree_sequence()

示例#22

0

显示文件

 def tree(self):
     tables = tskit.TableCollection(1.0)
     tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
     for j in range(3):
         tables.nodes.add_row(flags=0, time=j + 1)
         tables.edges.add_row(left=0, right=1, parent=j + 1, child=j)
     tables.sort()
     return tables.tree_sequence().first()

示例#23

0

显示文件

 def test_asdict_reference_no_metadata(self):
     tables = tskit.TableCollection(1)
     tables.reference_sequence.data = "ABCDEF"
     d = tables.asdict()["reference_sequence"]
     assert d["data"] == "ABCDEF"
     assert d["url"] == ""
     assert "metadata" not in d
     assert "metadata_schema" not in d

示例#24

0

显示文件

 def test_same_object(self):
     tables = tskit.TableCollection(1)
     refseq = tables.reference_sequence
     tables.reference_sequence.data = "asdf"
     assert refseq.data == "asdf"
     # Not clear we want to do this, but keeping the same pattern as the
     # tables for now.
     assert tables.reference_sequence is not refseq

示例#25

0

显示文件

 def test_write_metadata_schema_fails(self):
     tables = tskit.TableCollection(1)
     tables.reference_sequence.data = "abc"
     ts = tables.tree_sequence()
     with pytest.raises(AttributeError, match="read-only"):
         ts.reference_sequence.metadata_schema = (
             tskit.MetadataSchema.permissive_json()
         )

示例#26

0

显示文件

 def test_write_metadata_fails(self):
     tables = tskit.TableCollection(1)
     tables.reference_sequence.data = "abc"
     ts = tables.tree_sequence()
     with pytest.raises(AttributeError, match="read-only"):
         # NOTE: it can be slightly confusing here because we try to encode
         # first, and so we don't get an AttributeError for all inputs.
         ts.reference_sequence.metadata = b"xyz"

示例#27

0

显示文件

 def test_zero_has_no_children(self):
     tables = tskit.TableCollection(1)
     tables.nodes.add_row(time=1, flags=0)
     tables.nodes.add_row(time=2, flags=0)
     tables.nodes.add_row(time=3, flags=0)
     tables.edges.add_row(0, 1, 2, 1)
     with pytest.raises(ValueError):
         tsinfer.check_ancestors_ts(tables.tree_sequence())

示例#28

0

显示文件

文件： test_compression.py 项目： brianzhang01/tszip

 def test_mutation_parent_example(self):
     tables = tskit.TableCollection(1)
     tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
     tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
     tables.sites.add_row(position=0, ancestral_state="A")
     tables.mutations.add_row(site=0, node=0, derived_state="T")
     tables.mutations.add_row(site=0, node=0, parent=0, derived_state="A")
     self.verify(tables.tree_sequence())

示例#29

0

显示文件

 def test_fromdict_reference_data(self):
     d = tskit.TableCollection(1).asdict()
     d["reference_sequence"] = {"data": "XYZ"}
     tables = tskit.TableCollection.fromdict(d)
     assert tables.has_reference_sequence()
     assert tables.reference_sequence.data == "XYZ"
     assert tables.reference_sequence.url == ""
     assert repr(tables.reference_sequence.metadata_schema) == ""
     assert tables.reference_sequence.metadata == b""

示例#30

0

显示文件

 def test_fromdict_reference_url(self):
     d = tskit.TableCollection(1).asdict()
     d["reference_sequence"] = {"url": "file://file.fasta"}
     tables = tskit.TableCollection.fromdict(d)
     assert tables.has_reference_sequence()
     assert tables.reference_sequence.data == ""
     assert tables.reference_sequence.url == "file://file.fasta"
     assert repr(tables.reference_sequence.metadata_schema) == ""
     assert tables.reference_sequence.metadata == b""