def test_match_variant_sequence_to_reference_context_exact_match():
    # Variant sequence is exact match for beginning of TP53-201 transcript
    variant_sequence, reference_context, expected = \
        make_inputs_for_tp53_201_variant()

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0)
    eq_(expected, result)
示例#2
0
def test_match_variant_sequence_to_reference_context_exact_match():
    # Variant sequence is exact match for beginning of TP53-201 transcript
    variant_sequence, reference_context, expected = \
        make_inputs_for_tp53_201_variant()

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0)
    eq_(expected, result)
def test_match_variant_sequence_to_reference_context_bad_start_nucleotide_trimming():
    # match should succeed if 1 round of trimming is allowed
    variant_sequence, reference_context, expected = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="CTG",
            n_bad_nucleotides_at_start=1)
    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=2,
        max_transcript_mismatches=0,
        max_trimming_attempts=1)
    eq_(expected, result)
示例#4
0
def test_match_variant_sequence_to_reference_context_bad_start_nucleotide_trimming(
):
    # match should succeed if 1 round of trimming is allowed
    variant_sequence, reference_context, expected = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="CTG",
            n_bad_nucleotides_at_start=1)
    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=2,
        max_transcript_mismatches=0,
        max_trimming_attempts=1)
    eq_(expected, result)
示例#5
0
def test_match_variant_sequence_to_reference_context_include_mismatches_after_variant(
):
    variant_sequence, reference_context, expected = \
        make_inputs_for_tp53_201_variant(
            cdna_suffix="AGAAGCCGCAGTCAGAT",  # too long and also one mismatch: G>A in 3rd char
            mismatches_after_variant=15)

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0,
        include_mismatches_after_variant=False)
    # should have a result, since we're not counting mismatches after the variant
    eq_(expected, result)

    # now say we want to count mismatches after the variant - expect no result
    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0,
        include_mismatches_after_variant=True)
    eq_(None, result)
def test_match_variant_sequence_to_reference_context_bad_start_nucleotide_no_trimming():
    # matching should fail if no mismatches are allowed and no trimming rounds
    # are allowed
    variant_sequence, reference_context, _ = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="CTG",
            n_bad_nucleotides_at_start=1)

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=2,
        max_transcript_mismatches=0,
        max_trimming_attempts=0)
    eq_(None, result)
示例#7
0
def test_match_variant_sequence_to_reference_context_bad_start_nucleotide_no_trimming(
):
    # matching should fail if no mismatches are allowed and no trimming rounds
    # are allowed
    variant_sequence, reference_context, _ = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="CTG",
            n_bad_nucleotides_at_start=1)

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=2,
        max_transcript_mismatches=0,
        max_trimming_attempts=0)
    eq_(None, result)
示例#8
0
def test_match_variant_sequence_to_reference_context_not_enough_prefix():
    # Variant sequence missing first nucleotide of start codon
    # ("TG" instead of "ATG") and the variant occurrs immediately after
    # the start codon. Since the min_transcript_prefix_length is 3 in
    # this case we expect the match function to return None
    variant_sequence, reference_context, _ = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="TG",
            reference_context_size=2)

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0)
    eq_(result, None)
def test_match_variant_sequence_to_reference_context_not_enough_prefix():
    # Variant sequence missing first nucleotide of start codon
    # ("TG" instead of "ATG") and the variant occurrs immediately after
    # the start codon. Since the min_transcript_prefix_length is 3 in
    # this case we expect the match function to return None
    variant_sequence, reference_context, _ = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="TG",
            reference_context_size=2)

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0)
    eq_(result, None)
示例#10
0
def test_match_variant_sequence_to_reference_context_trim_1_bad_nucleotide():
    # Variant sequence has an extra nucleotide at the beginning which is
    # supported by only 1 read, whereas the correct sequence is supported by
    # 2 reads. If we allow > 1 "attempt" in the match function then it will
    # trim off the extra "G" and correctly match against the TP53-201
    # transcript sequence.

    variant_sequence, reference_context, expected = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="GATG",
            n_bad_nucleotides_at_start=1)

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0,
        max_trimming_attempts=1)
    eq_(expected, result)
def test_match_variant_sequence_to_reference_context_trim_1_bad_nucleotide():
    # Variant sequence has an extra nucleotide at the beginning which is
    # supported by only 1 read, whereas the correct sequence is supported by
    # 2 reads. If we allow > 1 "attempt" in the match function then it will
    # trim off the extra "G" and correctly match against the TP53-201
    # transcript sequence.

    variant_sequence, reference_context, expected = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="GATG",
            n_bad_nucleotides_at_start=1)

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0,
        max_trimming_attempts=1)
    eq_(expected, result)
示例#12
0
def test_match_variant_sequence_to_reference_context_ignore_extra_prefix():
    # There are three "extra" nucleotides at the start but since we are
    # only using reference context size of 3 then this sequence will
    # match.
    variant_sequence, reference_context, expected = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="GGGATG",
            n_bad_nucleotides_at_start=3,
            reference_context_size=3)

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0,
        max_trimming_attempts=0)
    eq_(expected, result)
    # make sure that the "GGG" codon got ignored since translation
    # should start at the "ATG" after it
    eq_(result.cdna_sequence[:3], "ATG")
def test_match_variant_sequence_to_reference_context_ignore_extra_prefix():
    # There are three "extra" nucleotides at the start but since we are
    # only using reference context size of 3 then this sequence will
    # match.
    variant_sequence, reference_context, expected = \
        make_inputs_for_tp53_201_variant(
            cdna_prefix="GGGATG",
            n_bad_nucleotides_at_start=3,
            reference_context_size=3)

    result = match_variant_sequence_to_reference_context(
        variant_sequence=variant_sequence,
        reference_context=reference_context,
        min_transcript_prefix_length=3,
        max_transcript_mismatches=0,
        max_trimming_attempts=0)
    eq_(expected, result)
    # make sure that the "GGG" codon got ignored since translation
    # should start at the "ATG" after it
    eq_(result.cdna_sequence[:3], "ATG")