Пример #1
0
 def record_edge(self, left, right, parent, child):
     """
     Adds an edge to the output list.
     """
     if self.reduce_to_site_topology:
         X = self.position_lookup
         left_index = np.searchsorted(X, left)
         right_index = np.searchsorted(X, right)
         # Find the smallest site position index greater than or equal to left
         # and right, i.e., slide each endpoint of an interval to the right
         # until they hit a site position. If both left and right map to the
         # the same position then we discard this edge. We also discard an
         # edge if left = 0 and right is less than the first site position.
         if left_index == right_index or (left_index == 0 and right_index == 1):
             return
         # Remap back to zero if the left end maps to the first site.
         if left_index == 1:
             left_index = 0
         left = X[left_index]
         right = X[right_index]
     if child not in self.edge_buffer:
         self.edge_buffer[child] = [tskit.Edge(left, right, parent, child)]
     else:
         last = self.edge_buffer[child][-1]
         if last.right == left:
             last.right = right
         else:
             self.edge_buffer[child].append(tskit.Edge(left, right, parent, child))
Пример #2
0
 def record_edge(self, left, right, parent, child):
     """
     Adds an edge to the output list.
     """
     if child not in self.edge_buffer:
         self.edge_buffer[child] = [tskit.Edge(left, right, parent, child)]
     else:
         last = self.edge_buffer[child][-1]
         if last.right == left:
             last.right = right
         else:
             self.edge_buffer[child].append(tskit.Edge(left, right, parent, child))
Пример #3
0
 def edge_diffs(self):
     M = self._tree_sequence.get_num_edges()
     sequence_length = self._tree_sequence.get_sequence_length()
     edges = [tskit.Edge(*self._tree_sequence.get_edge(j), j) for j in range(M)]
     time = [self._tree_sequence.get_node(edge.parent)[1] for edge in edges]
     in_order = sorted(range(M), key=lambda j: (
         edges[j].left, time[j], edges[j].parent, edges[j].child))
     out_order = sorted(range(M), key=lambda j: (
         edges[j].right, -time[j], -edges[j].parent, -edges[j].child))
     j = 0
     k = 0
     left = 0.0
     while j < M or left < sequence_length:
         e_out = []
         e_in = []
         while k < M and edges[out_order[k]].right == left:
             h = out_order[k]
             e_out.append(edges[h])
             k += 1
         while j < M and edges[in_order[j]].left == left:
             h = in_order[j]
             e_in.append(edges[h])
             j += 1
         right = sequence_length
         if j < M:
             right = min(right, edges[in_order[j]].left)
         if k < M:
             right = min(right, edges[out_order[k]].right)
         yield (left, right), e_out, e_in
         left = right
Пример #4
0
    def trees(self):
        M = self._tree_sequence.get_num_edges()
        sequence_length = self._tree_sequence.get_sequence_length()
        edges = [
            tskit.Edge(*self._tree_sequence.get_edge(j), j) for j in range(M)]
        t = [
            self._tree_sequence.get_node(j)[1]
            for j in range(self._tree_sequence.get_num_nodes())]
        in_order = sorted(
            range(M), key=lambda j: (
                edges[j].left, t[edges[j].parent], edges[j].parent, edges[j].child))
        out_order = sorted(
            range(M), key=lambda j: (
                edges[j].right, -t[edges[j].parent], -edges[j].parent, -edges[j].child))
        j = 0
        k = 0
        N = self._tree_sequence.get_num_nodes()
        st = PythonTree(N)

        samples = list(self._tree_sequence.get_samples())
        for l in range(len(samples)):
            if l < len(samples) - 1:
                st.right_sib[samples[l]] = samples[l + 1]
            if l > 0:
                st.left_sib[samples[l]] = samples[l - 1]
            st.above_sample[samples[l]] = True
            st.is_sample[samples[l]] = True

        st.left_root = tskit.NULL
        if len(samples) > 0:
            st.left_root = samples[0]

        u = st.left_root
        roots = []
        while u != -1:
            roots.append(u)
            v = st.right_sib[u]
            if v != -1:
                assert st.left_sib[v] == u
            u = v

        st.left = 0
        while j < M or st.left < sequence_length:
            while k < M and edges[out_order[k]].right == st.left:
                p = edges[out_order[k]].parent
                c = edges[out_order[k]].child
                k += 1

                lsib = st.left_sib[c]
                rsib = st.right_sib[c]
                if lsib == tskit.NULL:
                    st.left_child[p] = rsib
                else:
                    st.right_sib[lsib] = rsib
                if rsib == tskit.NULL:
                    st.right_child[p] = lsib
                else:
                    st.left_sib[rsib] = lsib
                st.parent[c] = tskit.NULL
                st.left_sib[c] = tskit.NULL
                st.right_sib[c] = tskit.NULL

                # If c is not above a sample then we have nothing to do as we
                # cannot affect the status of any roots.
                if st.above_sample[c]:
                    # Compute the new above sample status for the nodes from
                    # p up to root.
                    v = p
                    above_sample = False
                    while v != tskit.NULL and not above_sample:
                        above_sample = st.is_sample[v]
                        u = st.left_child[v]
                        while u != tskit.NULL:
                            above_sample = above_sample or st.above_sample[u]
                            u = st.right_sib[u]
                        st.above_sample[v] = above_sample
                        root = v
                        v = st.parent[v]

                    if not above_sample:
                        # root is no longer above samples. Remove it from the root list.
                        lroot = st.left_sib[root]
                        rroot = st.right_sib[root]
                        st.left_root = tskit.NULL
                        if lroot != tskit.NULL:
                            st.right_sib[lroot] = rroot
                            st.left_root = lroot
                        if rroot != tskit.NULL:
                            st.left_sib[rroot] = lroot
                            st.left_root = rroot
                        st.left_sib[root] = tskit.NULL
                        st.right_sib[root] = tskit.NULL

                    # Add c to the root list.
                    # print("Insert ", c, "into root list")
                    if st.left_root != tskit.NULL:
                        lroot = st.left_sib[st.left_root]
                        if lroot != tskit.NULL:
                            st.right_sib[lroot] = c
                        st.left_sib[c] = lroot
                        st.left_sib[st.left_root] = c
                    st.right_sib[c] = st.left_root
                    st.left_root = c

            while j < M and edges[in_order[j]].left == st.left:
                p = edges[in_order[j]].parent
                c = edges[in_order[j]].child
                j += 1

                # print("insert ", c, "->", p)
                st.parent[c] = p
                u = st.right_child[p]
                lsib = st.left_sib[c]
                rsib = st.right_sib[c]
                if u == tskit.NULL:
                    st.left_child[p] = c
                    st.left_sib[c] = tskit.NULL
                    st.right_sib[c] = tskit.NULL
                else:
                    st.right_sib[u] = c
                    st.left_sib[c] = u
                    st.right_sib[c] = tskit.NULL
                st.right_child[p] = c

                if st.above_sample[c]:
                    v = p
                    above_sample = False
                    while v != tskit.NULL and not above_sample:
                        above_sample = st.above_sample[v]
                        st.above_sample[v] = st.above_sample[v] or st.above_sample[c]
                        root = v
                        v = st.parent[v]
                    # print("root = ", root, st.above_sample[root])

                    if not above_sample:
                        # Replace c with root in root list.
                        # print("replacing", root, "with ", c ," in root list")
                        if lsib != tskit.NULL:
                            st.right_sib[lsib] = root
                        if rsib != tskit.NULL:
                            st.left_sib[rsib] = root
                        st.left_sib[root] = lsib
                        st.right_sib[root] = rsib
                        st.left_root = root
                    else:
                        # Remove c from root list.
                        # print("remove ", c ," from root list")
                        st.left_root = tskit.NULL
                        if lsib != tskit.NULL:
                            st.right_sib[lsib] = rsib
                            st.left_root = lsib
                        if rsib != tskit.NULL:
                            st.left_sib[rsib] = lsib
                            st.left_root = rsib

            st.right = sequence_length
            if j < M:
                st.right = min(st.right, edges[in_order[j]].left)
            if k < M:
                st.right = min(st.right, edges[out_order[k]].right)
            assert st.left_root != tskit.NULL
            while st.left_sib[st.left_root] != tskit.NULL:
                st.left_root = st.left_sib[st.left_root]
            st.index += 1
            # Add in all the sites
            st.site_list = [
                site for site in self._sites if st.left <= site.position < st.right]
            yield st
            st.left = st.right
Пример #5
0
    def compress_path(self, head):
        """
        Tries to compress the path for the specified edge chain, and returns
        the resulting path.
        """
        # print("Compress for child:", head.child)
        edge = head
        # Find all edges in the index that have the same (left, right, parent)
        # values as edges in the edge path for this child.
        matches = []
        contig_offsets = []
        last_match = tskit.Edge(-1, -1, -1, -1)
        while edge is not None:
            # print("\tConsidering ", edge.left, edge.right, edge.parent)
            key = (edge.left, edge.right, edge.parent, -1)
            index = self.path_index.bisect(key)
            if index < len(
                    self.path_index) and self.path_index.iloc[index][:3] == (
                        edge.left,
                        edge.right,
                        edge.parent,
                    ):
                match = self.path_index.peekitem(index)[1]
                matches.append((edge, match))
                condition = (edge.left == last_match.right
                             and match.child == last_match.child)
                if not condition:
                    contig_offsets.append(len(matches) - 1)
                last_match = match
            edge = edge.next
        contig_offsets.append(len(matches))

        # FIXME This is just to check the contig finding code above. Remove.
        contiguous_matches = [[(None, tskit.Edge(-1, -1, -1, -1))]]  # Sentinel
        for edge, match in matches:
            condition = (edge.left == contiguous_matches[-1][-1][1].right and
                         match.child == contiguous_matches[-1][-1][1].child)
            if condition:
                contiguous_matches[-1].append((edge, match))
            else:
                contiguous_matches.append([(edge, match)])
        other_matches = [None]
        for j in range(len(contig_offsets) - 1):
            contigs = matches[contig_offsets[j]:contig_offsets[j + 1]]
            other_matches.append(contigs)
        assert len(other_matches) == len(contiguous_matches)
        for c1, c2 in zip(contiguous_matches[1:], other_matches[1:]):
            assert c1 == c2

        for j in range(len(contig_offsets) - 1):
            match_list = matches[contig_offsets[j]:contig_offsets[j + 1]]
            if len(match_list) > 1:
                child_id = match_list[0][1].child
                # print("MATCH:", child_id)
                if self.flags[child_id] == constants.NODE_IS_PC_ANCESTOR:
                    # print("EXISTING SYNTHETIC")
                    for edge, match in match_list:
                        # print("\t", edge, match)
                        edge.parent = child_id
                else:
                    # print("NEW SYNTHETIC")
                    self.create_pc_node(match_list)
        return self.squash_edges(head)