def test_common_intervals_len_5_c(alg): sig1 = (0, 1, 2, 3, 4) sig2 = (0, 2, 4, 1, 3) commons = [ # len 4 CommonInterval((1, 4), (1, 4)), # len 5 CommonInterval((0, 4), (0, 4)), ] test_results(commons, alg, sig1, sig2)
def test_common_intervals_3_strings_len_5(alg): pi_1 = [0, 1, 2, 3, 4] pi_2 = [1, 0, 3, 4, 2] pi_3 = [1, 4, 3, 0, 2] commons = [ # len 2 CommonInterval((3, 4), (2, 3), (1, 2)), # len 5 CommonInterval((0, 4), (0, 4), (0, 4)), ] test_results(commons, alg, pi_1, pi_2, pi_3)
def test_common_intervals_len_4(alg): sig1 = (0, 1, 2, 3) sig2 = (3, 0, 2, 1) commons = [ # len 2 CommonInterval((1, 2), (2, 3)), # len 3 CommonInterval((0, 2), (1, 3)), # len 4 CommonInterval((0, 3), (0, 3)), ] test_results(commons, alg, sig1, sig2)
def common_k(perm_id: Sequence, perms: Sequence[Sequence], sub_len: int): k_index = {} # index will be lazily updated on request for w1 in window(perm_id, sub_len): charset = frozenset(w1) if in_others := find_in_others(k_index, charset, perms[1:]): yield CommonInterval((w1[0], w1[-1]), *in_others)
def trivial_common(perm_a: Sequence, perm_b: Sequence) -> List[CommonInterval]: commons = [] for l in range(2, len(perm_a) + 1): for w1 in window(perm_a, l): chars1 = set(w1) if in_other := find_in_other(chars1, perm_b): commons.append(CommonInterval((w1[0], w1[-1]), in_other))
def _intersects_with(cls, intervals: CommonIntervals, ci: CommonInterval) -> IntervalList: """Find intervals that intersect with ci and are 'After' ci""" return [ other for other in intervals if CommonInterval.intersect(ci, other) and ci.first_end <= other.first_end ]
def test_common_intervals_len_5_b(alg): sig1 = (0, 1, 2, 3, 4) sig2 = (3, 1, 4, 0, 2) commons = [ # len 5 CommonInterval((0, 4), (0, 4)), ] test_results(commons, alg, sig1, sig2)
def test_common_intervals_len_5(alg): sig1 = (0, 1, 2, 3, 4) sig2 = (4, 3, 0, 2, 1) commons = [ # len 2 CommonInterval((1, 2), (3, 4)), CommonInterval((3, 4), (0, 1)), # len 3 CommonInterval((0, 2), (2, 4)), # len 4 CommonInterval((0, 3), (1, 4)), # len 5 CommonInterval((0, 4), (0, 4)), ] test_results(commons, alg, sig1, sig2)
def chain_starting_with(self, ci: CommonInterval) -> List[CommonInterval]: lvl = self.reverse_index[ci] chain = [] ci_index = self.nesting_levels[lvl].index(ci) items_after_ci = self.nesting_levels[lvl][ci_index + 1:] cur = ci for other in items_after_ci: if CommonInterval.intersect(cur, other): chain.append(other) cur = other else: break return [ci] + chain if chain else []
def process_merged_chars(cls, raw_tree: PQTree, translation: Tuple[Mapping[Tuple[ContextChar, ContextChar], MergedChar]], perms): assert len(translation) == 1 tree_copy = deepcopy(raw_tree) reverse_translation = invert_dict_multi_val(translation[0]) for node, parent in tree_copy: if not isinstance(node, LeafNode): continue if not isinstance(node.ci.alt_sign, MergedChar): continue mc: MergedChar = node.ci.alt_sign cc1, cc2 = reverse_translation[mc][0] first_ci_start = node.ci.first_start first_leaf_ci = CommonInterval((first_ci_start, first_ci_start)) first_leaf_ci.sign = cc1.char second_leaf_ci = CommonInterval((first_ci_start + 1, first_ci_start + 1)) second_leaf_ci.sign = cc2.char first_leaf = LeafNode(first_leaf_ci) second_leaf = LeafNode(second_leaf_ci) new_qnode = QNode((first_ci_start, first_ci_start + 1)).with_children((first_leaf, second_leaf)) # if parent is a P node just accept children as leafs, # same behaviour in case the chars always show up in the same order # otherwise add an Q node as a child - and split the node as its leaf children # if parent is a P node - transform merged char to child QNode # in case the parent is Q - if the merged char is first or last in children order # try lift leafs to parent - # otherwise, the merged char in the middle of its parent - same as P case # if isinstance(parent, PNode) or len(mc.char_orders) == 1: # parent.replace_child(node, first_leaf, second_leaf) # else: # parent.replace_child(node, # QNode((first_ci_start, first_ci_start + 1)).with_children( # (first_leaf, second_leaf))) if isinstance(parent, PNode): parent.replace_child(node, new_qnode) # elif node in [parent.children[0], parent.children[-1]] and mc.other_side_same: # parent.replace_child(node, first_leaf, second_leaf) # elif mc.context_same: # parent.replace_child(node, first_leaf, second_leaf) # else: # parent.replace_child(node, new_qnode) elif cls.possible_raise_merged_char(perms, node, first_leaf, second_leaf, parent): parent.replace_child(node, first_leaf, second_leaf) else: parent.replace_child(node, new_qnode) return tree_copy
def known_example(): """ the one from: Gene Proximity Analysis across Whole Genomes via PQ Trees1 """ commons1 = [ # len 1 *[CommonInterval((i, i)) for i in range(9)], # len 2 CommonInterval((0, 1), (6, 7), (0, 1)), CommonInterval((1, 2), (7, 8), (1, 2)), CommonInterval((3, 4), (2, 3), (5, 6)), CommonInterval((4, 5), (3, 4), (6, 7)), # len 3 CommonInterval((0, 2), (6, 8), (0, 2)), CommonInterval((3, 5), (2, 4), (5, 7)), # len 4 CommonInterval((3, 6), (2, 5), (4, 7)), # len 5 CommonInterval((3, 7), (1, 5), (3, 7)), # len 6 CommonInterval((3, 8), (0, 5), (3, 8)), # len 8 CommonInterval((0, 7), (1, 8), (0, 7)), # len 9 CommonInterval((0, 8), (0, 8), (0, 8)), ] ir_intervals = ReduceIntervals.reduce(commons1) s = IntervalHierarchy.from_irreducible_intervals(ir_intervals) pqtree = PQTreeBuilder._from_s(s, None) assert pqtree.to_parens() == "[[0 1 2] [[[3 4 5] 6] 7] 8]"
def test_common_intervals_3_strings_len_9(alg): pi_1 = [0, 1, 2, 3, 4, 5, 6, 7, 8] pi_2 = [8, 7, 3, 4, 5, 6, 0, 1, 2] pi_3 = [0, 1, 2, 7, 6, 3, 4, 5, 8] commons = [ # len 2 CommonInterval((0, 1), (6, 7), (0, 1)), CommonInterval((1, 2), (7, 8), (1, 2)), CommonInterval((3, 4), (2, 3), (5, 6)), CommonInterval((4, 5), (3, 4), (6, 7)), # len 3 CommonInterval((0, 2), (6, 8), (0, 2)), CommonInterval((3, 5), (2, 4), (5, 7)), # len 4 CommonInterval((3, 6), (2, 5), (4, 7)), # len 5 CommonInterval((3, 7), (1, 5), (3, 7)), # len 6 CommonInterval((3, 8), (0, 5), (3, 8)), # len 8 CommonInterval((0, 7), (1, 8), (0, 7)), # len 9 CommonInterval((0, 8), (0, 8), (0, 8)), ] test_results(commons, alg, pi_1, pi_2, pi_3)
def test_common_intervals_len_9(alg): sig1 = (0, 1, 2, 3, 4, 5, 6, 7, 8) sig2 = (8, 7, 3, 4, 5, 6, 0, 1, 2) commons = [ # len 2 CommonInterval((0, 1), (6, 7)), CommonInterval((1, 2), (7, 8)), CommonInterval((3, 4), (2, 3)), CommonInterval((4, 5), (3, 4)), CommonInterval((5, 6), (4, 5)), CommonInterval((7, 8), (0, 1)), # len 3 CommonInterval((0, 2), (6, 8)), CommonInterval((3, 5), (2, 4)), CommonInterval((4, 6), (3, 5)), # len 4 CommonInterval((3, 6), (2, 5)), # len 4 CommonInterval((3, 7), (1, 5)), # len 6 CommonInterval((3, 8), (0, 5)), # len 7 CommonInterval((0, 6), (2, 8)), # len 8 CommonInterval((0, 7), (1, 8)), # len 9 CommonInterval((0, 8), (0, 8)), ] test_results(commons, alg, sig1, sig2)
def test_given_examples(): # C = {[1, 2], [2, 3], [1, 3], [4, 5], [5, 6], [4, 6], [4, 7], [4, 8], [4, 9], [1, 8], [1, 9]} # I = {[1, 2], [2, 3], [4, 5], [5, 6], [4, 7], [4, 8], [4, 9], [1, 8] } commons1 = [ # len 2 CommonInterval((0, 1), (6, 7), (0, 1)), CommonInterval((1, 2), (7, 8), (1, 2)), CommonInterval((3, 4), (2, 3), (5, 6)), CommonInterval((4, 5), (3, 4), (6, 7)), # len 3 CommonInterval((0, 2), (6, 8), (0, 2)), CommonInterval((3, 5), (2, 4), (5, 7)), # len 4 CommonInterval((3, 6), (2, 5), (4, 7)), # len 5 CommonInterval((3, 7), (1, 5), (3, 7)), # len 6 CommonInterval((3, 8), (0, 5), (3, 8)), # len 8 CommonInterval((0, 7), (1, 8), (0, 7)), # len 9 CommonInterval((0, 8), (0, 8), (0, 8)), ] irreducible1 = { # len 2 CommonInterval((0, 1), (6, 7), (0, 1)), CommonInterval((1, 2), (7, 8), (1, 2)), CommonInterval((3, 4), (2, 3), (5, 6)), CommonInterval((4, 5), (3, 4), (6, 7)), # len 4 CommonInterval((3, 6), (2, 5), (4, 7)), # len 5 CommonInterval((3, 7), (1, 5), (3, 7)), # len 6 CommonInterval((3, 8), (0, 5), (3, 8)), # len 8 CommonInterval((0, 7), (1, 8), (0, 7)), } commons2 = [ # len 2 CommonInterval((0, 1)), CommonInterval((1, 2)), # len 3 CommonInterval((0, 2)), # len 5 CommonInterval((2, 7)), CommonInterval((3, 8)), CommonInterval((4, 9)), # len 7 CommonInterval((0, 7)), # len 8 CommonInterval((0, 8)), # len 9 CommonInterval((0, 9)), ] irreducible2 = { # len 2 CommonInterval((0, 1)), CommonInterval((1, 2)), # len 5 CommonInterval((2, 7)), CommonInterval((3, 8)), CommonInterval((4, 9)), } assert ReduceIntervals.reduce(commons1) == irreducible1 assert ReduceIntervals.reduce(commons2) == irreducible2
def produce_s_for_example(): commons1 = [ # len 1 *[CommonInterval((i, i)) for i in range(9)], # len 2 CommonInterval((0, 1), (6, 7), (0, 1)), CommonInterval((1, 2), (7, 8), (1, 2)), CommonInterval((3, 4), (2, 3), (5, 6)), CommonInterval((4, 5), (3, 4), (6, 7)), # len 3 CommonInterval((0, 2), (6, 8), (0, 2)), CommonInterval((3, 5), (2, 4), (5, 7)), # len 4 CommonInterval((3, 6), (2, 5), (4, 7)), # len 5 CommonInterval((3, 7), (1, 5), (3, 7)), # len 6 CommonInterval((3, 8), (0, 5), (3, 8)), # len 8 CommonInterval((0, 7), (1, 8), (0, 7)), # len 9 CommonInterval((0, 8), (0, 8), (0, 8)), ] ir_intervals = ReduceIntervals.reduce(commons1) s = IntervalHierarchy.from_irreducible_intervals(ir_intervals) pprint(s.nesting_levels)
def common_k(perm_id: Sequence, perms: Sequence[Sequence], k: int): for w1 in window(perm_id, k): chars1 = set(w1) if in_others := find_in_others(chars1, perms[1:]): yield CommonInterval((w1[0], w1[-1]), *in_others)