Exemplo n.º 1
0
    def test_not_correctable_deletion(self):
        """ Same deletion again, but correction cutoff set to 0 """

        sam_fields = [
            "test_read", "0", "chr1", "202892094", "255", "2M1D2M", "*", "0",
            "0", "AAGA", "*", "NM:i:1", "MD:Z:2^A2", "jI:B:i,-1", "jM:B:c,-1"
        ]

        genome = Fasta("input_files/hg38_chr1.fa")
        maxLen = 0
        spliceAnnot = None
        variants = {}
        logInfo = TC.init_log_info(sam_fields)

        # Init transcript object
        transcript = t2.Transcript(sam_fields, genome, spliceAnnot)

        # Run correction
        TE_entries = TC.correctDeletions(transcript, genome, variants, maxLen,
                                         logInfo)

        # Check to see if correction was successful
        assert transcript.SEQ == "AAGA"
        assert transcript.CIGAR == "2M1D2M"

        # Check TE log
        expected_TE = "\t".join([
            "test_read", "chr1_202892095_202892096", "Deletion", "1",
            "Uncorrected", "TooLarge"
        ]) + "\n"

        assert TE_entries == expected_TE
Exemplo n.º 2
0
    def test_variant_deletion(self):
        """ Same deletion again, but with a matching variant at the same 
            location. Correct action is to leave the deletion in place """

        sam_fields = [
            "test_read", "0", "chr1", "202892094", "255", "2M1D2M", "*", "0",
            "0", "AAGA", "*", "NM:i:1", "MD:Z:2^A2", "jI:B:i,-1", "jM:B:c,-1"
        ]

        genome = Fasta("input_files/hg38_chr1.fa")
        maxLen = 5
        spliceAnnot = None
        variants = {"chr1_202892095_202892096": 1}
        logInfo = TC.init_log_info(sam_fields)

        # Init transcript object
        transcript = t2.Transcript(sam_fields, genome, spliceAnnot)

        # Run correction
        TE_entries = TC.correctDeletions(transcript, genome, variants, maxLen,
                                         logInfo)

        # Check to see if deletion is still there as expected
        assert transcript.SEQ == "AAGA"
        assert transcript.CIGAR == "2M1D2M"

        # Check TE log
        expected_TE = "\t".join([
            "test_read", "chr1_202892095_202892096", "Deletion", "1",
            "Uncorrected", "VariantMatch"
        ]) + "\n"

        assert TE_entries == expected_TE
Exemplo n.º 3
0
    def test_correctable_deletion(self):
        """ Toy transcript with sequence AA-GA, where the '-' is a deletion of 
            the base 'A'.
            chr1: 202,892,094 - 202,892,098. Deletion is at 202,892,096 """

        sam_fields = [
            "test_read", "0", "chr1", "202892094", "255", "2M1D2M", "*", "0",
            "0", "AAGA", "*", "NM:i:1", "MD:Z:2^A2", "jI:B:i,-1", "jM:B:c,-1"
        ]

        genome = Fasta("input_files/hg38_chr1.fa")
        maxLen = 5
        spliceAnnot = None
        variants = {}
        logInfo = TC.init_log_info(sam_fields)

        # Init transcript object
        transcript = t2.Transcript(sam_fields, genome, spliceAnnot)

        # Run correction
        TE_entries = TC.correctDeletions(transcript, genome, variants, maxLen,
                                         logInfo)

        # Check to see if correction was successful
        assert transcript.SEQ == "AAAGA"
        assert transcript.CIGAR == "5M"

        # Check TE log
        expected_TE = "\t".join([
            "test_read", "chr1_202892095_202892096", "Deletion", "1",
            "Corrected", "NA"
        ]) + "\n"

        assert TE_entries == expected_TE