def test_sequences_without_commonality_have_no_alignment(s1, s2): u1 = s1.difference(s2) u2 = s1.intersection(s2) assert u1.intersection(u2) == set() _, als = align(list(u1), list(u2), simple_score, simple_gap) assert (len(list(als))) == 0
def test_imperfect_alignment_is_found(chunk): separator = max(chunk) + 1 a = list(chunk) * 2 b = list(chunk) + [separator] + list(chunk) _, alignments = align(a, b, simple_score, simple_gap) als = list(alignments) # We should only find one alignment assert len(als) == 1 # The alignment for both strings should start at 0 assert als[0][0] == (0, 0)
def update(anchor, handle=None): """Update an anchor based on the current contents of its source file. Args: anchor: The `Anchor` to be updated. handle: File-like object containing contents of the anchor's file. If `None`, then this function will open the file and read it. Returns: A new `Anchor`, possibly identical to the input. Raises: ValueError: No alignments could be found between old anchor and new text. AlignmentError: If no anchor could be created. The message of the exception will say what the problem is. """ if handle is None: with anchor.file_path.open(mode='rt') as fp: source_text = fp.read() else: source_text = handle.read() handle.seek(0) ctxt = anchor.context a_score, alignments = align(ctxt.full_text, source_text, score, gap_penalty) # max_score = len(ctxt.full_text) * 3 try: alignment = next(alignments) except StopIteration: raise AlignmentError('No alignments for anchor: {}'.format(anchor)) anchor_offset = ctxt.offset - len(ctxt.before) source_indices = tuple(s_idx for (a_idx, s_idx) in alignment if a_idx is not None if s_idx is not None if _index_in_topic(a_idx + anchor_offset, anchor)) if not source_indices: raise AlignmentError( "Best alignment does not map topic to updated source.") return make_anchor(file_path=anchor.file_path, offset=source_indices[0], width=len(source_indices), context_width=anchor.context.width, metadata=anchor.metadata, handle=handle)
def test_alignment_finds_perfect_subset(prefix, match, suffix): larger = prefix + match + suffix _, alignments = align(match, larger, simple_score, simple_gap) als = [ al for al in alignments # only perfect alignments if len(al) == len(match) # only alignments that start after `prefix` if al[0][1] == len(prefix) ] assert len(als) == 1 actual = als[0] expected = tuple( zip(range(len(match)), range(len(prefix), len(prefix) + len(match)))) assert actual == expected
def test_multiple_alignments(match, size): match = list(match) larger = match * size _, als = align(match, larger, simple_score, simple_gap) assert len(list(als)) == size
def test_alignments_are_no_longer_than_longest_input(s1, s2): max_input_len = max(len(s1), len(s2)) _, als = align(s1, s2, simple_score, simple_gap) for al in als: assert len(al) <= max_input_len
def test_sequences_with_commonality_have_at_least_one_alignment(seqs): _, als = align(seqs[0], seqs[1], simple_score, simple_gap) assert len(list(als)) >= 1
def test_empty_sequences_have_no_alignment(seq): _, als = align(seq, [], simple_score, simple_gap) assert len(list(als)) == 0 _, als = align([], seq, simple_score, simple_gap) assert len(list(als)) == 0