def test_longest_overlap(): # This should return the longest possible overlap = # shortest possible subsequence between sequences. longest_overlap1 = {"one": "ACCCC", "two": "CCCCG"} assert find_overlaps(longest_overlap1) == "ACCCCG" longest_overlap2 = {"three": "GGGGT", "four": "AGGGG"} assert find_overlaps(longest_overlap2) == "AGGGGT"
def test_rosalind_exercise(): # Make sure that the script produces the right answer as judged by Rosalind test_file = "data/rosalind_long5.txt" answer_file = "results/long5.txt" with open(answer_file, "r") as read_file: answer = read_file.readline().strip() sequence_dictionary = read_sequences(test_file) assert find_overlaps(sequence_dictionary) == answer
def test_simple_sequences_with_duplicates(): # This should return a single instance of the longest supersequence simple_with_duplicates = { "one": "AC", "two": "CG", "three": "GT", "one-dup": "AC", "two-dup": "CG", "three-dup": "GT", } assert find_overlaps(simple_with_duplicates) == "ACGT"
def test_two_simple_sequences(): two_simple_sequences = {"one": "AC", "two": "CG"} assert find_overlaps(two_simple_sequences) == "ACG"
def test_simple_deduplication(): # This should return on of the two identical sequences simple_duplicates = {"one": "A", "two": "A"} assert find_overlaps(simple_duplicates) == "A"
def test_single_sequence(): # This should return the sequence itself assert find_overlaps({"single_sequence": "ACGT"}) == "ACGT"
def test_find_overlaps(): # Make sure the sample dataset works as intended assert find_overlaps(sample_dictionary) == "ATTAGACCTGCCGGAATAC"