Exemplo n.º 1
0
 def record_edge(self, left, right, parent, child):
     """
     Adds an edge to the output list.
     """
     if child not in self.edge_buffer:
         self.edge_buffer[child] = [
             msprime.Edge(left, right, parent, child)
         ]
     else:
         last = self.edge_buffer[child][-1]
         if last.right == left:
             last.right = right
         else:
             self.edge_buffer[child].append(
                 msprime.Edge(left, right, parent, child))
Exemplo n.º 2
0
    def test_partial_overlap(self):
        num_sites = 7
        sample_data = tsinfer.SampleData.initialise(3, num_sites)
        for j in range(num_sites):
            sample_data.add_variant(j, ["0", "1"], [0, 1, 1])
        sample_data.finalise()
        ancestor_data = tsinfer.AncestorData.initialise(sample_data)

        ancestor_data.add_ancestor(  # ID 0
            start=0,
            end=7,
            focal_sites=[],
            time=5,
            haplotype=[0, 0, 0, 0, 0, 0, 0])
        ancestor_data.add_ancestor(  # ID 1
            start=0,
            end=7,
            focal_sites=[],
            time=4,
            haplotype=[0, 0, 0, 0, 0, 0, 0])
        ancestor_data.add_ancestor(  # ID 2
            start=0,
            end=3,
            focal_sites=[2],
            time=3,
            haplotype=[0, 0, 1, 0, 0, 0, 0])
        ancestor_data.add_ancestor(  # ID 3
            start=3,
            end=7,
            focal_sites=[4, 6],
            time=2,
            haplotype=[-1, -1, -1, 0, 1, 0, 1])
        ancestor_data.add_ancestor(  # ID 4
            start=0,
            end=7,
            focal_sites=[0, 1, 3, 5],
            time=1,
            haplotype=[1, 1, 1, 1, 1, 1, 1])
        ancestor_data.finalise()

        expected_edges = [
            msprime.Edge(0, 7, 0, 1),
            msprime.Edge(0, 3, 2, 4),
            msprime.Edge(3, 7, 3, 4),
            msprime.Edge(3, 7, 1, 3),
            msprime.Edge(0, 3, 1, 2)
        ]
        self.verify_edges(sample_data, ancestor_data, expected_edges)
Exemplo n.º 3
0
 def edge_diffs(self):
     M = self._tree_sequence.get_num_edges()
     sequence_length = self._tree_sequence.get_sequence_length()
     edges = [msprime.Edge(*self._tree_sequence.get_edge(j)) for j in range(M)]
     time = [self._tree_sequence.get_node(edge.parent)[1] for edge in edges]
     in_order = sorted(range(M), key=lambda j: (
         edges[j].left, time[j], edges[j].parent, edges[j].child))
     out_order = sorted(range(M), key=lambda j: (
         edges[j].right, -time[j], -edges[j].parent, -edges[j].child))
     j = 0
     k = 0
     left = 0
     while j < M or left < sequence_length:
         e_out = []
         e_in = []
         while k < M and edges[out_order[k]].right == left:
             h = out_order[k]
             e_out.append(edges[h])
             k += 1
         while j < M and edges[in_order[j]].left == left:
             h = in_order[j]
             e_in.append(edges[h])
             j += 1
         right = sequence_length
         if j < M:
             right = min(right, edges[in_order[j]].left)
         if k < M:
             right = min(right, edges[out_order[k]].right)
         yield (left, right), e_out, e_in
         left = right
Exemplo n.º 4
0
    def test_edge_overlap_bug(self):
        num_sites = 12
        sample_data = tsinfer.SampleData.initialise(3, num_sites)
        for j in range(num_sites):
            sample_data.add_variant(j, ["0", "1"], [0, 1, 1])
        sample_data.finalise()
        ancestor_data = tsinfer.AncestorData.initialise(sample_data)

        ancestor_data.add_ancestor(  # ID 0
            start=0,
            end=12,
            focal_sites=[],
            time=8,
            haplotype=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        ancestor_data.add_ancestor(  # ID 1
            start=0,
            end=12,
            focal_sites=[],
            time=7,
            haplotype=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        ancestor_data.add_ancestor(  # ID 2
            start=0,
            end=4,
            focal_sites=[],
            time=6,
            haplotype=[0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1])
        ancestor_data.add_ancestor(  # ID 3
            start=4,
            end=12,
            focal_sites=[],
            time=5,
            haplotype=[-1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0])
        ancestor_data.add_ancestor(  # ID 4
            start=8,
            end=12,
            focal_sites=[9, 11],
            time=4,
            haplotype=[-1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 0, 1])
        ancestor_data.add_ancestor(  # ID 5
            start=4,
            end=8,
            focal_sites=[5, 7],
            time=3,
            haplotype=[-1, -1, -1, -1, 0, 1, 0, 1, -1, -1, -1, -1])
        ancestor_data.add_ancestor(  # ID 6
            start=0,
            end=4,
            focal_sites=[1, 3],
            time=2,
            haplotype=[0, 1, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1])
        ancestor_data.add_ancestor(  # ID 7
            start=0,
            end=12,
            focal_sites=[0, 2, 4, 6, 8, 10],
            time=1,
            haplotype=[1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0])
        ancestor_data.finalise()

        expected_edges = [
            msprime.Edge(0, 12, 0, 1),
            msprime.Edge(0, 4, 1, 2),
            msprime.Edge(4, 12, 1, 3),
            msprime.Edge(8, 12, 1, 4),
            msprime.Edge(4, 8, 1, 5),
            msprime.Edge(0, 4, 1, 6),
            msprime.Edge(0, 4, 1, 7),
            msprime.Edge(4, 8, 5, 7),
            msprime.Edge(8, 12, 1, 7)
        ]
        self.verify_edges(sample_data, ancestor_data, expected_edges)
Exemplo n.º 5
0
 def record_edge(self, left, right, parent, child):
     """
     Adds an edge to the output list.
     """
     self.edge_buffer.append(
         msprime.Edge(left=left, right=right, parent=parent, child=child))
Exemplo n.º 6
0
    def trees(self):
        M = self._tree_sequence.get_num_edges()
        sequence_length = self._tree_sequence.get_sequence_length()
        edges = [
            msprime.Edge(*self._tree_sequence.get_edge(j)) for j in range(M)
        ]
        t = [
            self._tree_sequence.get_node(j)[1]
            for j in range(self._tree_sequence.get_num_nodes())
        ]
        in_order = sorted(range(M),
                          key=lambda j: (edges[j].left, t[edges[j].parent],
                                         edges[j].parent, edges[j].child))
        out_order = sorted(range(M),
                           key=lambda j: (edges[j].right, -t[edges[j].parent],
                                          -edges[j].parent, -edges[j].child))
        j = 0
        k = 0
        N = self._tree_sequence.get_num_nodes()
        st = PythonSparseTree(N)

        samples = list(self._tree_sequence.get_samples())
        for l in range(len(samples)):
            if l < len(samples) - 1:
                st.right_sib[samples[l]] = samples[l + 1]
            if l > 0:
                st.left_sib[samples[l]] = samples[l - 1]
            st.above_sample[samples[l]] = True
            st.is_sample[samples[l]] = True

        st.left_root = msprime.NULL_NODE
        if len(samples) > 0:
            st.left_root = samples[0]

        u = st.left_root
        roots = []
        while u != -1:
            roots.append(u)
            v = st.right_sib[u]
            if v != -1:
                assert st.left_sib[v] == u
            u = v

        st.left = 0
        while j < M or st.left < sequence_length:
            while k < M and edges[out_order[k]].right == st.left:
                p = edges[out_order[k]].parent
                c = edges[out_order[k]].child
                k += 1

                lsib = st.left_sib[c]
                rsib = st.right_sib[c]
                if lsib == msprime.NULL_NODE:
                    st.left_child[p] = rsib
                else:
                    st.right_sib[lsib] = rsib
                if rsib == msprime.NULL_NODE:
                    st.right_child[p] = lsib
                else:
                    st.left_sib[rsib] = lsib
                st.parent[c] = msprime.NULL_NODE
                st.left_sib[c] = msprime.NULL_NODE
                st.right_sib[c] = msprime.NULL_NODE

                # If c is not above a sample then we have nothing to do as we
                # cannot affect the status of any roots.
                if st.above_sample[c]:
                    # Compute the new above sample status for the nodes from
                    # p up to root.
                    v = p
                    above_sample = False
                    while v != msprime.NULL_NODE and not above_sample:
                        above_sample = st.is_sample[v]
                        u = st.left_child[v]
                        while u != msprime.NULL_NODE:
                            above_sample = above_sample or st.above_sample[u]
                            u = st.right_sib[u]
                        st.above_sample[v] = above_sample
                        root = v
                        v = st.parent[v]

                    if not above_sample:
                        # root is no longer above samples. Remove it from the root list.
                        lroot = st.left_sib[root]
                        rroot = st.right_sib[root]
                        st.left_root = msprime.NULL_NODE
                        if lroot != msprime.NULL_NODE:
                            st.right_sib[lroot] = rroot
                            st.left_root = lroot
                        if rroot != msprime.NULL_NODE:
                            st.left_sib[rroot] = lroot
                            st.left_root = rroot
                        st.left_sib[root] = msprime.NULL_NODE
                        st.right_sib[root] = msprime.NULL_NODE

                    # Add c to the root list.
                    # print("Insert ", c, "into root list")
                    if st.left_root != msprime.NULL_NODE:
                        lroot = st.left_sib[st.left_root]
                        if lroot != msprime.NULL_NODE:
                            st.right_sib[lroot] = c
                        st.left_sib[c] = lroot
                        st.left_sib[st.left_root] = c
                    st.right_sib[c] = st.left_root
                    st.left_root = c

            while j < M and edges[in_order[j]].left == st.left:
                p = edges[in_order[j]].parent
                c = edges[in_order[j]].child
                j += 1

                # print("insert ", c, "->", p)
                st.parent[c] = p
                u = st.right_child[p]
                lsib = st.left_sib[c]
                rsib = st.right_sib[c]
                if u == msprime.NULL_NODE:
                    st.left_child[p] = c
                    st.left_sib[c] = msprime.NULL_NODE
                    st.right_sib[c] = msprime.NULL_NODE
                else:
                    st.right_sib[u] = c
                    st.left_sib[c] = u
                    st.right_sib[c] = msprime.NULL_NODE
                st.right_child[p] = c

                if st.above_sample[c]:
                    v = p
                    above_sample = False
                    while v != msprime.NULL_NODE and not above_sample:
                        above_sample = st.above_sample[v]
                        st.above_sample[
                            v] = st.above_sample[v] or st.above_sample[c]
                        root = v
                        v = st.parent[v]
                    # print("root = ", root, st.above_sample[root])

                    if not above_sample:
                        # Replace c with root in root list.
                        # print("replacing", root, "with ", c ," in root list")
                        if lsib != msprime.NULL_NODE:
                            st.right_sib[lsib] = root
                        if rsib != msprime.NULL_NODE:
                            st.left_sib[rsib] = root
                        st.left_sib[root] = lsib
                        st.right_sib[root] = rsib
                        st.left_root = root
                    else:
                        # Remove c from root list.
                        # print("remove ", c ," from root list")
                        st.left_root = msprime.NULL_NODE
                        if lsib != msprime.NULL_NODE:
                            st.right_sib[lsib] = rsib
                            st.left_root = lsib
                        if rsib != msprime.NULL_NODE:
                            st.left_sib[rsib] = lsib
                            st.left_root = rsib

            st.right = sequence_length
            if j < M:
                st.right = min(st.right, edges[in_order[j]].left)
            if k < M:
                st.right = min(st.right, edges[out_order[k]].right)
            assert st.left_root != msprime.NULL_NODE
            while st.left_sib[st.left_root] != msprime.NULL_NODE:
                st.left_root = st.left_sib[st.left_root]
            st.index += 1
            # Add in all the sites
            st.site_list = [
                site for site in self._sites
                if st.left <= site.position < st.right
            ]
            yield st
            st.left = st.right
Exemplo n.º 7
0
 def store_edge(self, left, right, parent, child):
     """
     Stores the specified edge to the output tree sequence.
     """
     self.edge_buffer.append(
         msprime.Edge(left=left, right=right, parent=parent, child=child))