def test_forward_words(): 'It test that we can match words against in the same orientation' seq = 'gCACAggTGTGggTATAgg' seq = SeqWithQuality(seq=Seq(seq)) result = match_words(seq, ['CACA', 'TATA', 'KK'])[0] assert result['query'] == seq #The match por CACA match = result['matches'][0] assert match['subject'] == 'CACA' assert match['start'] == 1 assert match['end'] == 10 assert len(match['match_parts']) == 2 #the reverse match part assert match['match_parts'][1] == {'query_start':7, 'query_end':10, 'query_strand':1, 'subject_start':0, 'subject_end':3, 'subject_strand':-1} #The match por TATA match = result['matches'][1] assert match['subject'] == 'TATA' assert match['start'] == 13 assert match['end'] == 16 assert len(match['match_parts']) == 2 #No matches for KK assert len(result['matches']) == 2
def strip_words_by_matching(sequence): """It strips the given words from a sequence. It returns a striped sequence with the longest segment without the words. """ if sequence is None: return None if not words: return sequence alignments = match_words(sequence, words) if not alignments: return sequence locations = _get_non_matched_locations(alignments) segments = _get_longest_non_matched_seq_region_limits(sequence, locations) if segments is None: return None segments = _get_non_matched_from_matched_locations([segments], len(sequence)) _add_trim_segments(segments, sequence) return sequence