示例#1
0
 def verify_simple_model(
     self, n, seed=1, recombination_rate=None, length=None, recombination_map=None
 ):
     ts1 = msprime.simulate(
         n,
         random_seed=seed,
         recombination_rate=recombination_rate,
         length=length,
         recombination_map=recombination_map,
         model=self.model,
     )
     tables = tskit.TableCollection(ts1.sequence_length)
     tables.populations.add_row()
     for _ in range(n):
         tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0)
     ts2 = msprime.simulate(
         from_ts=tables.tree_sequence(),
         start_time=0,
         random_seed=seed,
         recombination_rate=recombination_rate,
         recombination_map=recombination_map,
         model=self.model,
     )
     tables1 = ts1.dump_tables()
     tables2 = ts2.dump_tables()
     assert len(tables1.populations)
     assert len(tables2.populations)
     # TODO use updated tskit APIs for comparisons.
     tables1.populations.clear()
     tables2.populations.clear()
     tables1.populations.metadata_schema = ""
     tables2.populations.metadata_schema = ""
     tables1.provenances.clear()
     tables2.provenances.clear()
     assert tables1 == tables2
def brute_force_merge_and_simplify(pstate):
    tc = tskit.TableCollection(pstate.tables.sequence_length)

    flags = np.zeros(len(pstate.tables.nodes), dtype=np.uint32)
    for p in pstate.parents:
        flags[p.n0] = 1
        flags[p.n1] = 1
    tc.nodes.set_columns(
        flags=flags,
        time=-1.0 *
        (pstate.tables.nodes.time - pstate.tables.nodes.time.max()),
    )

    tc.edges.set_columns(
        pstate.tables.edges.left,
        pstate.tables.edges.right,
        pstate.tables.edges.parent,
        pstate.tables.edges.child,
    )
    for eb in pstate.buffered_edges:
        for i in eb[0] + eb[1]:
            tc.edges.add_row(*i)
    tc.sort()
    tc.simplify()
    return tc.tree_sequence()
    def export(self):
        """
        Exports the edges to a tskit tree sequence.

        NOTE: the individuals themselves are sorted by birth order.
        The segments w/in an individual are/should be/maybe
        quite close to sorted.  Thus, a full table sort
        is probably wasteful and we sort segments w/in
        individuals instead, which can be trivially
        parallelized across individuals.
        """
        tables = tskit.TableCollection(self.sequence_length)
        # Map the individuals to their indexes to make debug easier.
        individuals = {
            ind.index: j
            for j, ind in enumerate(reversed(self.individuals))
        }
        for ind in reversed(self.individuals):
            # print("adding", ind)
            ret = tables.nodes.add_row(
                flags=tskit.NODE_IS_SAMPLE if ind.is_alive is True else 0,
                time=self.time - ind.time)

        for ind in reversed(self.individuals):
            segments = sorted(
                ind.segments,
                key=lambda x:
                (-x.child.time, individuals[x.child.index], x.left))
            for seg in segments:
                tables.edges.add_row(left=seg.left,
                                     right=seg.right,
                                     parent=individuals[ind.index],
                                     child=individuals[seg.child.index])
        # print(tables)
        return tables.tree_sequence()
示例#4
0
def decompress_zarr(root):
    tables = tskit.TableCollection(root.attrs["sequence_length"])
    coordinates = root["coordinates"][:]

    tables.individuals.set_columns(
        flags=root["individuals/flags"],
        location=root["individuals/location"],
        location_offset=root["individuals/location_offset"],
        metadata=root["individuals/metadata"],
        metadata_offset=root["individuals/metadata_offset"])

    tables.nodes.set_columns(
        flags=root["nodes/flags"],
        time=root["nodes/time"],
        population=root["nodes/population"],
        individual=root["nodes/individual"],
        metadata=root["nodes/metadata"],
        metadata_offset=root["nodes/metadata_offset"])

    tables.edges.set_columns(
        left=coordinates[root["edges/left"]],
        right=coordinates[root["edges/right"]],
        parent=root["edges/parent"],
        child=root["edges/child"])

    tables.migrations.set_columns(
        left=coordinates[root["migrations/left"]],
        right=coordinates[root["migrations/right"]],
        node=root["migrations/node"],
        source=root["migrations/source"],
        dest=root["migrations/dest"],
        time=root["migrations/time"])

    tables.sites.set_columns(
        position=coordinates[root["sites/position"]],
        ancestral_state=root["sites/ancestral_state"],
        ancestral_state_offset=root["sites/ancestral_state_offset"],
        metadata=root["sites/metadata"],
        metadata_offset=root["sites/metadata_offset"])

    tables.mutations.set_columns(
        site=root["mutations/site"],
        node=root["mutations/node"],
        parent=root["mutations/parent"],
        derived_state=root["mutations/derived_state"],
        derived_state_offset=root["mutations/derived_state_offset"],
        metadata=root["mutations/metadata"],
        metadata_offset=root["mutations/metadata_offset"])

    tables.populations.set_columns(
        metadata=root["populations/metadata"],
        metadata_offset=root["populations/metadata_offset"])

    tables.provenances.set_columns(
        timestamp=root["provenances/timestamp"],
        timestamp_offset=root["provenances/timestamp_offset"],
        record=root["provenances/record"],
        record_offset=root["provenances/record_offset"])

    return tables.tree_sequence()
示例#5
0
    def test_missing_data_samples(self):
        tables = tskit.TableCollection(1.0)
        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
        tables.sites.add_row(0.5, "A")
        tables.mutations.add_row(0, 0, "T")
        ts = tables.tree_sequence()

        # If we have no samples we still get a list of variants.
        variants = list(ts.variants(samples=[]))
        assert len(variants[0].genotypes) == 0
        assert not variants[0].has_missing_data
        assert variants[0].alleles == ("A", "T")

        # If we have a single sample that's not missing, there's no
        # missing data.
        variants = list(ts.variants(samples=[0]))
        assert len(variants[0].genotypes) == 1
        assert variants[0].genotypes[0] == 1
        assert not variants[0].has_missing_data
        assert variants[0].alleles == ("A", "T")

        # If we have a single sample that is missing, there is
        # missing data.
        variants = list(ts.variants(samples=[1]))
        assert len(variants[0].genotypes) == 1
        assert variants[0].genotypes[0] == -1
        assert variants[0].has_missing_data
        assert variants[0].alleles == ("A", "T", None)
示例#6
0
 def test_no_edges_mutations(self):
     tables = tskit.TableCollection(1)
     for _ in range(2):
         tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE)
     tables.sites.add_row(0, "A")
     tables.mutations.add_row(0, 0, "T")
     self.verify(tables.tree_sequence())
示例#7
0
    def DecodeTree(self, A):
        """
        Take in the array produced by 'EncodeTreeSequence()' and return a
        the inverse operation to produce a TreeSequence() for testing.
        """

        num_rows = A.shape[0]
        num_columns = A.shape[1]
        tables = tskit.TableCollection(sequence_length=num_columns)
        node_table = tables.nodes
        edge_table = tables.edges
        pop_table = tables.populations
        pop_table.add_row()
        for row in range(num_rows):
            flag = 0
            time = A[row, 0, 0]
            if (time == 0.0):
                flag = 1
            node_table.add_row(flags=flag, time=float(time), population=0)
            for column in range(num_columns):
                top = A[row, column, 1]
                bot = A[row, column, 2]
                # for padding, we don't add edges
                if ((top < 0) | (bot < 0)):
                    continue
                parent = GlueInt8(top, bot)
                edge_table.add_row(left=column,
                                   right=column + 1,
                                   parent=parent,
                                   child=row)  # NOQA
        tables.sort()
        tables.simplify()
        ts = tables.tree_sequence()
        return ts
示例#8
0
    def export(self):
        """
        Exports the edges to a tskit tree sequence.
        """
        tables = tskit.TableCollection(self.sequence_length)
        # Map the individuals to their indexes to make debug easier.
        # THIS IS A TERRIBLE IDEA!!!
        sorted_individuals = sorted(self.all_reachable(), key=lambda x: x.index)
        next_ind = 0
        for ind in sorted_individuals:
            while ind.index != next_ind:
                # Add in a padding node.
                tables.nodes.add_row(flags=0, time=0)
                next_ind += 1
            ret = tables.nodes.add_row(
                flags=tskit.NODE_IS_SAMPLE if ind.is_alive is True else 0,
                time=self.time - ind.time)
            assert ret == ind.index
            next_ind += 1

        for ind in sorted_individuals:
            for child, segments in ind.children.items():
                for seg in segments:
                    tables.edges.add_row(
                        left=seg.left, right=seg.right,
                        parent=ind.index, child=child.index)
        # Can't be bothered doing the sorting above to get rid of this,
        # but it's trivial.
        tables.sort()
        return tables.tree_sequence()
示例#9
0
    def test_equal_internal_node_time(self):
        #     6
        #   ┏━┻━┓
        #   4   5
        #  ┏┻┓ ┏┻┓
        #  0 1 2 3
        tables = tskit.TableCollection(1)
        for _ in range(4):
            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
        tables.nodes.add_row(0, time=1)
        tables.nodes.add_row(0, time=1)
        tables.nodes.add_row(0, time=2)

        tables.edges.add_row(0, 1, 4, 0)
        tables.edges.add_row(0, 1, 4, 1)
        tables.edges.add_row(0, 1, 5, 2)
        tables.edges.add_row(0, 1, 5, 3)
        tables.edges.add_row(0, 1, 6, 4)
        tables.edges.add_row(0, 1, 6, 5)
        tables.sort()
        ts = tables.tree_sequence()
        msout = tsconvert.to_ms(ts)
        # The current algorithm assumes node times are unique
        with pytest.raises(ValueError):
            tsconvert.from_ms(msout)
示例#10
0
def caterpillar_tree(n, num_sites=0, num_mutations=1):
    """
    Returns caterpillar tree with n samples. For each of the sites and
    path of at most n - 2 mutations are put down along the internal
    nodes. Each site gets exactly the same set of mutations.
    """
    if num_sites > 0 and num_mutations > n - 2:
        raise ValueError("At most n - 2 mutations allowed")
    tables = tskit.TableCollection(1)
    for j in range(n):
        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
    last_node = 0
    # Add the internal nodes
    for j in range(n - 1):
        u = tables.nodes.add_row(time=j + 1)
        tables.edges.add_row(0, tables.sequence_length, u, last_node)
        tables.edges.add_row(0, tables.sequence_length, u, j + 1)
        last_node = u
    for j in range(num_sites):
        tables.sites.add_row(position=(j + 1) / n, ancestral_state="0")
        node = 2 * n - 3
        state = 0
        for k in range(num_mutations):
            state = (state + 1) % 2
            tables.mutations.add_row(site=j,
                                     derived_state=str(state),
                                     node=node)
            node -= 1

    tables.sort()
    tables.build_index()
    tables.compute_mutation_parents()
    return tables.tree_sequence()
示例#11
0
    def test_multiple_mrcas(self):
        tables = tskit.TableCollection(sequence_length=1)
        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0,
                             individual=-1, time=0)
        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, population=0,
                             individual=-1, time=0)

        tables.edges.add_row(left=0, right=0.5, parent=2, child=1)
        tables.edges.add_row(left=0.5, right=1, parent=3, child=1)
        tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0,
                             individual=-1, time=0.1)
        tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0,
                             individual=-1, time=0.1)

        tables.edges.add_row(left=0, right=0.5, parent=4, child=0)
        tables.edges.add_row(left=0.5, right=1, parent=5, child=0)
        tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0,
                             individual=-1, time=0.15)
        tables.nodes.add_row(flags=msprime.NODE_IS_RE_EVENT, population=0,
                             individual=-1, time=0.15)

        tables.edges.add_row(left=0, right=0.5, parent=6, child=2)
        tables.edges.add_row(left=0, right=0.5, parent=6, child=4)
        tables.nodes.add_row(flags=0, population=0, individual=-1, time=0.5)

        tables.edges.add_row(left=0.5, right=1, parent=7, child=3)
        tables.edges.add_row(left=0.5, right=1, parent=7, child=5)
        tables.nodes.add_row(flags=0, population=0, individual=-1, time=1)

        tables.mutations.add_row(site=0, node=1, derived_state="1")
        tables.mutations.add_row(site=1, node=4, derived_state="1")
        tables.mutations.add_row(site=2, node=3, derived_state="1")

        tables.sites.add_row(0.1, "0")
        tables.sites.add_row(0.2, "0")
        tables.sites.add_row(0.7, "0")

        tables.populations.add_row()

        arg = tables.tree_sequence()

        rho = np.arange(0.1, 10, 0.1)
        for r in rho:
            log_arg_likelihood_exact = math.log(r) - (1 + 2 * r) * 0.1
            log_arg_likelihood_exact += math.log(r) - (3 + 2 * r) * 0.05
            log_arg_likelihood_exact -= (6 + 2 * r) * 0.35
            log_arg_likelihood_exact -= (1 + r) * 0.5
            self.assertTrue(math.isclose(log_arg_likelihood_exact,
                                         msprime.log_arg_likelihood(arg, r)))

        theta = np.arange(0.1, 10, 0.1)
        tree_length = 1.5
        for t in theta:
            unnormalised_mutation_ll_exact = (3 * math.log(tree_length * t) -
                                              tree_length * t)
            unnormalised_mutation_ll_exact -= math.log(tree_length)
            unnormalised_mutation_ll_exact -= 2 * math.log(2 * tree_length)
            self.assertTrue(math.isclose(
                            unnormalised_mutation_ll_exact,
                            msprime.unnormalised_log_mutation_likelihood(arg, t)))
示例#12
0
 def test_zero_has_parent(self):
     tables = tskit.TableCollection(1)
     tables.nodes.add_row(time=1, flags=0)
     tables.nodes.add_row(time=2, flags=0)
     tables.edges.add_row(0, 1, 1, 0)
     with self.assertRaises(ValueError):
         tsinfer.check_ancestors_ts(tables.tree_sequence())
示例#13
0
    def to_tsk_tree(self):
        seq_length = 1
        tables = tskit.TableCollection(seq_length)

        def add_node(node):
            if node.is_leaf():
                assert node.label is not None
                return node.label

            child_ids = [add_node(child) for child in node.children]
            # Arbitrarily set parent time +1 from their oldest child
            max_child_time = max(tables.nodes.time[c] for c in child_ids)
            parent_id = tables.nodes.add_row(time=max_child_time + 1)
            for child_id in child_ids:
                tables.edges.add_row(0, seq_length, parent_id, child_id)

            return parent_id

        for _ in range(self.num_leaves):
            tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
        add_node(self)

        # The way in which we're inserting nodes doesn't necessarily
        # adhere to the ordering constraint on edges, so we have
        # to sort.
        tables.sort()
        return tables.tree_sequence().first()
示例#14
0
 def test_fromdict_all_values_empty(self):
     d = tskit.TableCollection(1).asdict()
     d["reference_sequence"] = dict(
         data="", url="", metadata_schema="", metadata=b""
     )
     tables = tskit.TableCollection.fromdict(d)
     assert not tables.has_reference_sequence()
示例#15
0
 def test_missing_data(self):
     tables = tskit.TableCollection(1.0)
     tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
     tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
     tables.sites.add_row(0.5, "A")
     ts = tables.tree_sequence()
     self.assertRaises(ValueError, list,
                       ts.haplotypes(missing_data_character="A"))
     for c in ("-", ".", "a"):
         h = list(ts.haplotypes(missing_data_character=c))
         self.assertEqual(h, [c, c])
     h = list(ts.haplotypes(isolated_as_missing=True))
     self.assertEqual(h, ["-", "-"])
     h = list(ts.haplotypes(isolated_as_missing=False))
     self.assertEqual(h, ["A", "A"])
     h = list(ts.haplotypes())
     self.assertEqual(h, ["-", "-"])
     # Test deprecated method
     h = list(ts.haplotypes(impute_missing_data=True))
     self.assertEqual(h, ["A", "A"])
     h = list(ts.haplotypes(impute_missing_data=False))
     self.assertEqual(h, ["-", "-"])
     h = list(
         ts.haplotypes(isolated_as_missing=True, impute_missing_data=True))
     self.assertEqual(h, ["-", "-"])
     h = list(
         ts.haplotypes(isolated_as_missing=True, impute_missing_data=False))
     self.assertEqual(h, ["-", "-"])
     h = list(
         ts.haplotypes(isolated_as_missing=False, impute_missing_data=True))
     self.assertEqual(h, ["A", "A"])
     h = list(
         ts.haplotypes(isolated_as_missing=False,
                       impute_missing_data=False))
     self.assertEqual(h, ["A", "A"])
示例#16
0
    def test_missing_data_samples(self):
        tables = tskit.TableCollection(1.0)
        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
        tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
        tables.sites.add_row(0.5, "A")
        tables.mutations.add_row(0, 0, "T")
        ts = tables.tree_sequence()

        # If we have no samples we still get a list of variants.
        variants = list(ts.variants(samples=[]))
        self.assertEqual(len(variants[0].genotypes), 0)
        self.assertFalse(variants[0].has_missing_data)
        self.assertEqual(variants[0].alleles, ("A", "T"))

        # If we have a single sample that's not missing, there's no
        # missing data.
        variants = list(ts.variants(samples=[0]))
        self.assertEqual(len(variants[0].genotypes), 1)
        self.assertEqual(variants[0].genotypes[0], 1)
        self.assertFalse(variants[0].has_missing_data)
        self.assertEqual(variants[0].alleles, ("A", "T"))

        # If we have a single sample that is missing, there is
        # missing data.
        variants = list(ts.variants(samples=[1]))
        self.assertEqual(len(variants[0].genotypes), 1)
        self.assertEqual(variants[0].genotypes[0], -1)
        self.assertTrue(variants[0].has_missing_data)
        self.assertEqual(variants[0].alleles, ("A", "T", None))
示例#17
0
 def test_two_populations_migration(self):
     n = 10
     seed = 1234
     ts1 = msprime.simulate(
         population_configurations=[
             msprime.PopulationConfiguration(n),
             msprime.PopulationConfiguration(0),
         ],
         migration_matrix=[[0, 1], [1, 0]],
         random_seed=seed,
     )
     tables = tskit.TableCollection(1)
     tables.populations.add_row()
     tables.populations.add_row()
     for _ in range(n):
         tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0)
     ts2 = msprime.simulate(
         from_ts=tables.tree_sequence(),
         start_time=0,
         population_configurations=[
             msprime.PopulationConfiguration(),
             msprime.PopulationConfiguration(),
         ],
         migration_matrix=[[0, 1], [1, 0]],
         random_seed=seed,
     )
     tables1 = ts1.dump_tables()
     tables2 = ts2.dump_tables()
     tables1.provenances.clear()
     tables2.provenances.clear()
     self.assertEqual(tables1, tables2)
示例#18
0
 def verify_simple_model(
     self, n, seed=1, recombination_rate=None, length=None, recombination_map=None
 ):
     ts1 = msprime.simulate(
         n,
         random_seed=seed,
         recombination_rate=recombination_rate,
         length=length,
         recombination_map=recombination_map,
         model=self.model,
     )
     tables = tskit.TableCollection(ts1.sequence_length)
     tables.populations.add_row()
     for _ in range(n):
         tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0, population=0)
     ts2 = msprime.simulate(
         from_ts=tables.tree_sequence(),
         start_time=0,
         random_seed=seed,
         recombination_rate=recombination_rate,
         recombination_map=recombination_map,
         model=self.model,
     )
     tables1 = ts1.dump_tables()
     tables2 = ts2.dump_tables()
     tables1.provenances.clear()
     tables2.provenances.clear()
     self.assertEqual(tables1, tables2)
示例#19
0
def felsenstein_tables():
    """
    Return tables for the example tree.
    """
    #
    #     8
    #   ┏━┻━━┓
    #   ┃    7
    #   ┃   ┏┻┓
    #   6   ┃ ┃
    # ┏━┻┓  ┃ ┃
    # ┃  5  ┃ ┃
    # ┃ ┏┻┓ ┃ ┃
    # 2 3 4 0 1
    #
    tables = tskit.TableCollection(1)
    for _ in range(5):
        tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
    for j in range(4):
        tables.nodes.add_row(flags=0, time=j + 1)
    tables.edges.add_row(0, 1, 7, 0)
    tables.edges.add_row(0, 1, 7, 1)
    tables.edges.add_row(0, 1, 6, 2)
    tables.edges.add_row(0, 1, 5, 3)
    tables.edges.add_row(0, 1, 5, 4)
    tables.edges.add_row(0, 1, 6, 5)
    tables.edges.add_row(0, 1, 8, 6)
    tables.edges.add_row(0, 1, 8, 7)
    tables.sort()
    return tables
示例#20
0
def simulate(nsam: int):
    """
    The linear-time algorithm of Hudson, 1990,
    adapted to use tree sequences

    The citation for this algorithm is
    Hudson, Richard R. 1990.
    “Gene Genealogies and the Coalescent Process.”
    Oxford Surveys in Evolutionary Biology 7 (1): 44.

    Time is scaled in units of 2N generations.

    :param nsam: The sample size
    :type nsam: int
    """
    tc = tskit.TableCollection(1)

    nodes = np.arange(2 * nsam - 1, dtype=np.int32)
    for i in range(nsam):
        tc.nodes.add_row(time=0.0, flags=tskit.NODE_IS_SAMPLE)
    time = 0.0
    n = nsam
    while n > 1:
        # Generate time to next coalescent event,
        # in units of 2N generations.
        rcoal = (n * (n - 1)) / 2.
        tcoal = np.random.exponential(1. / rcoal)
        time += tcoal

        # Register a new ancestor node.
        # The node is not a sample,
        # so its flag is zero
        tc.nodes.add_row(time=time, flags=0)
        # This is the index of the
        # ancestor node
        ancestor = 2 * nsam - n

        # Perform the swap steps
        # of the algorithm
        p = np.random.choice(n, 1)[0]
        c1 = nodes[p]
        nodes[p] = nodes[n - 1]
        p = np.random.choice(n - 1, 1)[0]
        c2 = nodes[p]
        nodes[p] = nodes[ancestor]

        # Both c1 an c2 have the same parental
        # node (nodes[ancestor]).  An edge
        # table requires that child nodes
        # be sorted in increasing order
        # per parent, so we enforce that here
        if c1 > c2:
            c1, c2 = c2, c1
        # Record the edges
        tc.edges.add_row(parent=ancestor, child=c1, left=0.0, right=1.0)
        tc.edges.add_row(parent=ancestor, child=c2, left=0.0, right=1.0)
        n -= 1

    return tc.tree_sequence()
示例#21
0
文件: formats.py 项目: saunack/tskit
def _load_legacy_hdf5_v2(root, remove_duplicate_positions):
    # Get the coalescence records
    trees_group = root["trees"]
    old_timestamp = datetime.datetime.min.isoformat()
    provenances = tskit.ProvenanceTable()
    provenances.add_row(
        timestamp=old_timestamp,
        record=_get_v2_provenance("generate_trees", trees_group.attrs),
    )
    num_rows = trees_group["node"].shape[0]
    index = np.arange(num_rows, dtype=int)
    parent = np.zeros(2 * num_rows, dtype=np.int32)
    parent[2 * index] = trees_group["node"]
    parent[2 * index + 1] = trees_group["node"]
    left = np.zeros(2 * num_rows, dtype=np.float64)
    left[2 * index] = trees_group["left"]
    left[2 * index + 1] = trees_group["left"]
    right = np.zeros(2 * num_rows, dtype=np.float64)
    right[2 * index] = trees_group["right"]
    right[2 * index + 1] = trees_group["right"]
    child = np.array(trees_group["children"], dtype=np.int32).flatten()

    tables = tskit.TableCollection(np.max(right))
    tables.edges.set_columns(left=left,
                             right=right,
                             parent=parent,
                             child=child)

    cr_node = np.array(trees_group["node"], dtype=np.int32)
    num_nodes = max(np.max(child), np.max(cr_node)) + 1
    sample_size = np.min(cr_node)
    flags = np.zeros(num_nodes, dtype=np.uint32)
    population = np.zeros(num_nodes, dtype=np.int32)
    time = np.zeros(num_nodes, dtype=np.float64)
    flags[:sample_size] = tskit.NODE_IS_SAMPLE
    cr_population = np.array(trees_group["population"], dtype=np.int32)
    cr_time = np.array(trees_group["time"])
    time[cr_node] = cr_time
    population[cr_node] = cr_population
    if "samples" in root:
        samples_group = root["samples"]
        population[:sample_size] = samples_group["population"]
        if "time" in samples_group:
            time[:sample_size] = samples_group["time"]
    tables.nodes.set_columns(flags=flags, population=population, time=time)
    _set_populations(tables)

    if "mutations" in root:
        mutations_group = root["mutations"]
        _convert_hdf5_mutations(mutations_group, tables.sites,
                                tables.mutations, remove_duplicate_positions)
        provenances.add_row(
            timestamp=old_timestamp,
            record=_get_v2_provenance("generate_mutations",
                                      mutations_group.attrs),
        )
    tables.provenances.add_row(_get_upgrade_provenance(root))
    tables.sort()
    return tables.tree_sequence()
示例#22
0
 def tree(self):
     tables = tskit.TableCollection(1.0)
     tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
     for j in range(3):
         tables.nodes.add_row(flags=0, time=j + 1)
         tables.edges.add_row(left=0, right=1, parent=j + 1, child=j)
     tables.sort()
     return tables.tree_sequence().first()
示例#23
0
 def test_asdict_reference_no_metadata(self):
     tables = tskit.TableCollection(1)
     tables.reference_sequence.data = "ABCDEF"
     d = tables.asdict()["reference_sequence"]
     assert d["data"] == "ABCDEF"
     assert d["url"] == ""
     assert "metadata" not in d
     assert "metadata_schema" not in d
示例#24
0
 def test_same_object(self):
     tables = tskit.TableCollection(1)
     refseq = tables.reference_sequence
     tables.reference_sequence.data = "asdf"
     assert refseq.data == "asdf"
     # Not clear we want to do this, but keeping the same pattern as the
     # tables for now.
     assert tables.reference_sequence is not refseq
示例#25
0
 def test_write_metadata_schema_fails(self):
     tables = tskit.TableCollection(1)
     tables.reference_sequence.data = "abc"
     ts = tables.tree_sequence()
     with pytest.raises(AttributeError, match="read-only"):
         ts.reference_sequence.metadata_schema = (
             tskit.MetadataSchema.permissive_json()
         )
示例#26
0
 def test_write_metadata_fails(self):
     tables = tskit.TableCollection(1)
     tables.reference_sequence.data = "abc"
     ts = tables.tree_sequence()
     with pytest.raises(AttributeError, match="read-only"):
         # NOTE: it can be slightly confusing here because we try to encode
         # first, and so we don't get an AttributeError for all inputs.
         ts.reference_sequence.metadata = b"xyz"
示例#27
0
 def test_zero_has_no_children(self):
     tables = tskit.TableCollection(1)
     tables.nodes.add_row(time=1, flags=0)
     tables.nodes.add_row(time=2, flags=0)
     tables.nodes.add_row(time=3, flags=0)
     tables.edges.add_row(0, 1, 2, 1)
     with pytest.raises(ValueError):
         tsinfer.check_ancestors_ts(tables.tree_sequence())
示例#28
0
 def test_mutation_parent_example(self):
     tables = tskit.TableCollection(1)
     tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
     tables.nodes.add_row(flags=tskit.NODE_IS_SAMPLE, time=0)
     tables.sites.add_row(position=0, ancestral_state="A")
     tables.mutations.add_row(site=0, node=0, derived_state="T")
     tables.mutations.add_row(site=0, node=0, parent=0, derived_state="A")
     self.verify(tables.tree_sequence())
示例#29
0
 def test_fromdict_reference_data(self):
     d = tskit.TableCollection(1).asdict()
     d["reference_sequence"] = {"data": "XYZ"}
     tables = tskit.TableCollection.fromdict(d)
     assert tables.has_reference_sequence()
     assert tables.reference_sequence.data == "XYZ"
     assert tables.reference_sequence.url == ""
     assert repr(tables.reference_sequence.metadata_schema) == ""
     assert tables.reference_sequence.metadata == b""
示例#30
0
 def test_fromdict_reference_url(self):
     d = tskit.TableCollection(1).asdict()
     d["reference_sequence"] = {"url": "file://file.fasta"}
     tables = tskit.TableCollection.fromdict(d)
     assert tables.has_reference_sequence()
     assert tables.reference_sequence.data == ""
     assert tables.reference_sequence.url == "file://file.fasta"
     assert repr(tables.reference_sequence.metadata_schema) == ""
     assert tables.reference_sequence.metadata == b""