def test_match_variant_sequence_to_reference_context_exact_match(): # Variant sequence is exact match for beginning of TP53-201 transcript variant_sequence, reference_context, expected = \ make_inputs_for_tp53_201_variant() result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=3, max_transcript_mismatches=0) eq_(expected, result)
def test_match_variant_sequence_to_reference_context_bad_start_nucleotide_trimming(): # match should succeed if 1 round of trimming is allowed variant_sequence, reference_context, expected = \ make_inputs_for_tp53_201_variant( cdna_prefix="CTG", n_bad_nucleotides_at_start=1) result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=2, max_transcript_mismatches=0, max_trimming_attempts=1) eq_(expected, result)
def test_match_variant_sequence_to_reference_context_bad_start_nucleotide_trimming( ): # match should succeed if 1 round of trimming is allowed variant_sequence, reference_context, expected = \ make_inputs_for_tp53_201_variant( cdna_prefix="CTG", n_bad_nucleotides_at_start=1) result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=2, max_transcript_mismatches=0, max_trimming_attempts=1) eq_(expected, result)
def test_match_variant_sequence_to_reference_context_include_mismatches_after_variant( ): variant_sequence, reference_context, expected = \ make_inputs_for_tp53_201_variant( cdna_suffix="AGAAGCCGCAGTCAGAT", # too long and also one mismatch: G>A in 3rd char mismatches_after_variant=15) result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=3, max_transcript_mismatches=0, include_mismatches_after_variant=False) # should have a result, since we're not counting mismatches after the variant eq_(expected, result) # now say we want to count mismatches after the variant - expect no result result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=3, max_transcript_mismatches=0, include_mismatches_after_variant=True) eq_(None, result)
def test_match_variant_sequence_to_reference_context_bad_start_nucleotide_no_trimming(): # matching should fail if no mismatches are allowed and no trimming rounds # are allowed variant_sequence, reference_context, _ = \ make_inputs_for_tp53_201_variant( cdna_prefix="CTG", n_bad_nucleotides_at_start=1) result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=2, max_transcript_mismatches=0, max_trimming_attempts=0) eq_(None, result)
def test_match_variant_sequence_to_reference_context_bad_start_nucleotide_no_trimming( ): # matching should fail if no mismatches are allowed and no trimming rounds # are allowed variant_sequence, reference_context, _ = \ make_inputs_for_tp53_201_variant( cdna_prefix="CTG", n_bad_nucleotides_at_start=1) result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=2, max_transcript_mismatches=0, max_trimming_attempts=0) eq_(None, result)
def test_match_variant_sequence_to_reference_context_not_enough_prefix(): # Variant sequence missing first nucleotide of start codon # ("TG" instead of "ATG") and the variant occurrs immediately after # the start codon. Since the min_transcript_prefix_length is 3 in # this case we expect the match function to return None variant_sequence, reference_context, _ = \ make_inputs_for_tp53_201_variant( cdna_prefix="TG", reference_context_size=2) result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=3, max_transcript_mismatches=0) eq_(result, None)
def test_match_variant_sequence_to_reference_context_trim_1_bad_nucleotide(): # Variant sequence has an extra nucleotide at the beginning which is # supported by only 1 read, whereas the correct sequence is supported by # 2 reads. If we allow > 1 "attempt" in the match function then it will # trim off the extra "G" and correctly match against the TP53-201 # transcript sequence. variant_sequence, reference_context, expected = \ make_inputs_for_tp53_201_variant( cdna_prefix="GATG", n_bad_nucleotides_at_start=1) result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=3, max_transcript_mismatches=0, max_trimming_attempts=1) eq_(expected, result)
def test_match_variant_sequence_to_reference_context_ignore_extra_prefix(): # There are three "extra" nucleotides at the start but since we are # only using reference context size of 3 then this sequence will # match. variant_sequence, reference_context, expected = \ make_inputs_for_tp53_201_variant( cdna_prefix="GGGATG", n_bad_nucleotides_at_start=3, reference_context_size=3) result = match_variant_sequence_to_reference_context( variant_sequence=variant_sequence, reference_context=reference_context, min_transcript_prefix_length=3, max_transcript_mismatches=0, max_trimming_attempts=0) eq_(expected, result) # make sure that the "GGG" codon got ignored since translation # should start at the "ATG" after it eq_(result.cdna_sequence[:3], "ATG")