Пример #1
0
    def setUp(self):
        header = {
            "HD": {
                "VN": "1.0",
                "SO": "unsorted"
            },
            "SQ": [
                {
                    "SN": "ref1|seq1",
                    "LN": 1000000
                },
                {
                    "SN": "ref1|seq2",
                    "LN": 1000000
                },
                {
                    "SN": "ref2|seq1",
                    "LN": 1000000
                },
            ],
        }

        bam1_reads = [
            "read1     0       ref1|seq1      24975   42      80M     *       0       0       TGGGCCAGAAAAAATGACTTCTCCATCTCGCTGCCGGTAGACCGACTCTCTTTTCTGCTGGCGGTTGCCACGCTGAGCGG        AAAAAF.A.FFAFFFFFAFFFFFFFFFFFFFF<FFFFAFFFFFFA.FFFFA<7FFFFFFFF<FFFFFF))<FFFFF.FFF        AS:i:-3 XN:i:0  XM:i:1  XO:i:0  XG:i:0  NM:i:1  MD:Z:76A3       YT:Z:UU",
            "read2     0       ref1|seq1      20984   42      80M     *       0       0       GTTTAAACAGTTGTTGTTGTTCTTCCTGCGATACTCCACTTCCAGAAGCCATAATCGTCATTTTGATAACAGCGTGGTTG        AAAAA.<FFAFFFFFFF<FFAFF)FFFFF<FFF.FFA)FFAF<F<F<.FF<F.FFAFFF7FAFFF.AF.<)F7FFAAFFF        AS:i:-6 XN:i:0  XM:i:2  XO:i:0  XG:i:0  NM:i:2  MD:Z:33A13T32   YT:Z:UU",
            "read3     0       ref2|seq1       3210    42      80M     *       0       0       ACCTACCACTTCACCGACATATTCATGGCCCACGACCATCGGCACCGGGATGGATTTTTGCGACCACTCATCCCAGTTAT        AAAA7FAFFFFF.FFFFF<FFFAA7FFFFFF7FFFFFFFA<FF7FFAF<F.FF.FFF7FFFFAF<FFFFAFFFFA77FFF        AS:i:-3 XN:i:0  XM:i:1  XO:i:0  XG:i:0  NM:i:1  MD:Z:53T26      YT:Z:UU",
            "read4     0       ref1|seq2       9298    23      79M     *       0       0       CAGCATCGCTTCCAAAAATAGTAGTGCAGTTGATCGGAGTAGGAGCGTAATGGATTGCCTGCGTGATTGGCTATCTGGC AAAAAF.A.FFAFFFFFAFFFFFFFFFFFFFF<FFFFAFFFFFFA.FFFFA<7FFFFFFFF<FFFFFF))<FFFFF.FF AS:i:-23        XN:i:0  XM:i:6  XO:i:0  XG:i:0  NM:i:6  MD:Z:19T8A0C2T4T10T30   YT:Z:UU",
        ]

        aln_header = pysam.AlignmentHeader().from_dict(header)
        aln_segment = pysam.AlignedSegment()
        test_bam1 = pysam.AlignmentFile("test/test_bam1.bam",
                                        "wb",
                                        header=header)
        for read in bam1_reads:
            read = read_to_dict(read)
            test_bam1.write(aln_segment.from_dict(read, aln_header))
        test_bam1.close()

        bam2_reads = [
            "read1     0       ref2|seq1      24975   50      80M     *       0       0       TGGGCCAGAAAAAATGACTTCTCCATCTCGCTGCCGGTAGACCGACTCTCTTTTCTGCTGGCGGTTGCCACGCTGAGCGG        AAAAAF.A.FFAFFFFFAFFFFFFFFFFFFFF<FFFFAFFFFFFA.FFFFA<7FFFFFFFF<FFFFFF))<FFFFF.FFF        AS:i:0 XN:i:0  XM:i:1  XO:i:0  XG:i:0  NM:i:1  MD:Z:76A3       YT:Z:UU",
            "read2     0       ref2|seq1      20984   30      80M     *       0       0       GTTTAAACAGTTGTTGTTGTTCTTCCTGCGATACTCCACTTCCAGAAGCCATAATCGTCATTTTGATAACAGCGTGGTTG        AAAAA.<FFAFFFFFFF<FFAFF)FFFFF<FFF.FFA)FFAF<F<F<.FF<F.FFAFFF7FAFFF.AF.<)F7FFAAFFF        AS:i:-12 XN:i:0  XM:i:2  XO:i:0  XG:i:0  NM:i:2  MD:Z:33A13T32   YT:Z:UU",
        ]

        test_bam2 = pysam.AlignmentFile("test/test_bam2.bam",
                                        "wb",
                                        header=header)
        for read in bam2_reads:
            read = read_to_dict(read)
            test_bam2.write(aln_segment.from_dict(read, aln_header))
        test_bam2.close()
Пример #2
0
    def build_read(self):
        '''build an example read.'''

        header = pysam.AlignmentHeader(reference_names=["chr1", "chr2"],
                                       reference_lengths=[10000000, 10000000])

        a = pysam.AlignedSegment(header)
        a.query_name = "read_12345"
        a.query_sequence = "ATGC" * 10
        a.flag = 0
        a.reference_id = 0
        a.reference_start = 20
        a.mapping_quality = 20
        a.cigartuples = ((0, 10), (2, 1), (0, 9), (1, 1), (0, 20))
        a.next_reference_id = 0
        a.next_reference_start = 200
        a.template_length = 167
        a.query_qualities = pysam.qualitystring_to_array("1234") * 10
        return a
Пример #3
0
def make_read(ref_id, name, start, cigar):
    """ Build sam format read by position """
    header = pysam.AlignmentHeader()
    header_dict = {
        'HD': {
            'VN': '1.0'
        },
        'SQ': [{
            'LN': 1e6,
            'SN': 'chr1'
        }, {
            'LN': 1e6,
            'SN': 'chr2'
        }]
    }
    header = header.from_dict(header_dict)
    tgs_read = pysam.AlignedSegment(header)
    tgs_read.query_name = name
    tgs_read.reference_id = ref_id
    tgs_read.reference_start = start
    tgs_read.cigar = cigar
    tgs_read.setTag('FL', 1)
    return tgs_read
Пример #4
0
def vcf_from_fasta(args):
    """Entry point for calling variants by consensus sequence alignment."""
    logger = medaka.common.get_named_logger('CONS2VCF')

    with pysam.FastaFile(args.ref_fasta) as fasta:
        ref_seqs = {name: fasta.fetch(name) for name in fasta.references}
        contig_lengths = dict(zip(fasta.references, fasta.lengths))
        total_bp = sum(fasta.lengths)
        ref_contigs = fasta.references
        h = pysam.AlignmentHeader().from_references(fasta.references,
                                                    fasta.lengths)

    if args.bam is not None:
        alns = pysam.AlignmentFile(args.bam)
        out_bam = None
    else:
        out_bam = pysam.AlignmentFile(args.out_prefix + '.bam', 'wb', header=h)
        if args.regions is not None:
            contigs = [r.ref_name for r in args.regions]
        else:
            contigs = None
        alns = edlib_chunked_align_fastas(args.consensus,
                                          args.ref_fasta,
                                          contigs,
                                          chunk_size=args.chunk_size,
                                          pad=args.pad,
                                          mode=args.mode,
                                          header=h)
    vcf_fp = args.out_prefix + '.vcf'
    trees = collections.defaultdict(intervaltree.IntervalTree)
    t_log = now()
    log_interval = 5
    msg = 'Processed {:.2%} of reference.'
    bp_done = collections.Counter()

    header_contigs = [
        '{},length={}'.format(c, contig_lengths[c]) for c in ref_contigs
    ]
    with medaka.vcf.VCFWriter(vcf_fp, contigs=header_contigs) as writer:
        for aln in alns:
            # reference_start is 0 based, reference_end points to one past
            # the last aligned residue, i.e. same as bed file
            ref = aln.reference_name
            rstart, rend = aln.reference_start, aln.reference_end
            if trees[ref].overlaps(rstart, rend) and args.bam is not None:
                # We expect edlib alignments to overlap by 1 match so only
                # apply this check for a user-provided bam.
                logger.warning(
                    ('WARNING: alignment {}:{}-{} overlaps another ' +
                     'alignment, which could cause overlapping variants.' +
                     '\nCheck output bam and vcf for details.').format(
                         ref, rstart, rend))
            trees[ref].add(intervaltree.Interval(rstart, rend))
            for v in yield_variants_from_aln(aln, ref_seqs[ref]):
                if 'N' in v.ref:
                    continue
                writer.write_variant(v)
                if now() - t_log > log_interval:
                    done = bp_done[ref] + v.pos - rstart
                    logger.info(msg.format(done / total_bp))
                    t_log = now()
            bp_done[ref] += rend - rstart
            if out_bam is not None:
                out_bam.write(aln)

    if out_bam is not None:
        out_bam.close()
        pysam.index(out_bam.filename)

    bed_fp = args.out_prefix + '_coverage.bed'
    gap_bed_fp = args.out_prefix + '_coverage_gaps.bed'
    for tree in trees.values():
        # strict=False to merge abutting alignments.
        tree.merge_overlaps(strict=False)
    medaka.common.write_intervaltrees_to_bed(trees, bed_fp)
    gap_trees = medaka.common.complement_intervaltrees(trees, contig_lengths)
    medaka.common.write_intervaltrees_to_bed(gap_trees, gap_bed_fp)
    # loop over contigs for which we have alignments checking for gaps
    for contig in trees:
        if len(gap_trees[contig]):
            logger.info(('WARNING: There are alignment gaps for ref contig' +
                         ' {}, see bed files for details.').format(contig))
    if len(ref_contigs) != len(trees):
        logger.info('WARNING: Some contigs have no alignments, see bed files' +
                    ' for details.')
    # bp_done calculated above does not take account of overlapping alignments
    # hence recalculate here based on merged alignment intervals.
    aligned_bp = sum((i.length() for tree in trees.values() for i in tree))
    msg = 'Alignments spanned {:%} of the reference.'
    logger.info(msg.format(aligned_bp / total_bp))
    msg = 'Check bed files {} and {} for alignment coverage and gaps.'
    logger.info(msg.format(bed_fp, gap_bed_fp))
    logger.info('All done. VCF written to {}.'.format(vcf_fp))
Пример #5
0
    return clusters

def align_clusters(first, second):
    al = sw.global_alignment(first.query_sequence, second.query_sequence)
    
    num_hq_mismatches = 0
    for q_i, t_i in al['mismatches']:
        if (first.query_qualities[q_i] > 20) and (second.query_qualities[t_i] > 20):
            num_hq_mismatches += 1
            
    return al['XO'], num_hq_mismatches

cell_key = lambda al: al.get_tag(CELL_BC_TAG)
UMI_key = lambda al: al.get_tag(UMI_TAG)
loc_key = lambda al: (al.get_tag(LOC_TAG))
empty_header = pysam.AlignmentHeader()

def sort_cellranger_bam(bam_fn, sorted_fn, sort_key, filter_func, show_progress=False):
    Path(sorted_fn).parent.mkdir(exist_ok=True)

    bam_fh = pysam.AlignmentFile(str(bam_fn))

    als = bam_fh

    relevant = list(filter(filter_func, als))

    max_read_length = 0
    total_reads_out = 0
    
    chunk_fns = []
        
Пример #6
0
"""Test cases for the bam2fastq.py script"""
import unittest
import pysam
import bam2fastq
from io import StringIO

HEADER = pysam.AlignmentHeader().from_text("""@HD	VN:1.0	SO:coordinate
@SQ	SN:R00000042	LN:5231428	AS:gi|26111730|gb|AE014075.1|	SP:Ecol
@RG	ID:824f45e8-37f3-4cb9-8a05-63f0b7c9b959	PL:ILLUMINA	PU:160129_D00417_0381_AHJ2VGBCXX_2	LB:VAU2662A45	DT:2016-01-31T00:00:00+0000	SM:H125100459	CN:WTCHG
@RG	ID:7f568ff7-e0f6-4a55-ad17-6fe778ed8f83	PL:ILLUMINA	PU:160129_D00417_0381_AHJ2VGBCXX_1	LB:VAU2662A45	DT:2016-01-31T00:00:00+0000	SM:H125100459	CN:WTCHG
@CO	ID:stampy	TM:Mon, 07 Mar 2016 17:58:14 GMT	WD:/tmp/usecase3938954872417714817dir	HN:gel-pipeline3	UN:compass
@CO	ID:stampy	TM:Mon, 07 Mar 2016 20:24:40 GMT	WD:/tmp/usecase3938954872417714817dir	HN:gel-pipeline3	UN:compass
@CO	PN:stampy	ID:stampy	VN:1.0.23_(r2059)	CL:--substitutionrate=0.01 -g /tmp/usecase3938954872417714817dir/references/R00000042/R00000042 -h /tmp/usecase3938954872417714817dir/references/R00000042/R00000042 -M bam -o /tmp/usecase3938954872417714817dir/0564a575-a6f5-40bc-8898-b0b5e944c4d9.sam --logfile=/tmp/usecase3938954872417714817dir/0564a575-a6f5-40bc-8898-b0b5e944c4d9.sam.log --readgroup=ID:824f45e8-37f3-4cb9-8a05-63f0b7c9b959 --outputformat=sam -v 3
@CO	PN:stampy	ID:stampy	VN:1.0.23_(r2059)	CL:--substitutionrate=0.01 -g /tmp/usecase3938954872417714817dir/references/R00000042/R00000042 -h /tmp/usecase3938954872417714817dir/references/R00000042/R00000042 -M bam -o /tmp/usecase3938954872417714817dir/570a7dde-6c04-419c-a898-87f872dd4eda.sam --logfile=/tmp/usecase3938954872417714817dir/570a7dde-6c04-419c-a898-87f872dd4eda.sam.log --readgroup=ID:7f568ff7-e0f6-4a55-ad17-6fe778ed8f83 --outputformat=sam -v 3
@CO	PN:stampy	ID:stampy	VN:1.0.23_(r2059)	CL:--substitutionrate=0.01 -t 8 -g /tmp/R00000042 -h /tmp/R00000042 --readgroup=ID:WTCHG_246141_245101,SM:7c2f06_45,PL:ILLUMINA,PU:160129_D00417_0381_AHJ2VGBCXX_1,LB:VAU2662A45,DT:2016-01-31,CN:WTCHG --comment=@MISC/WTCHG_246141_245101.comments.txt -M FASTQ/WTCHG_246141_245101_1.fastq.gz,FASTQ/WTCHG_246141_245101_2.fastq.gz
@CO	ID:stampy	TM:Sun, 31 Jan 2016 13:04:28 GMT	WD:/data1/GA-DATA/160129_D00417_0381_AHJ2VGBCXX/Data/Intensities/BaseCallsHN:comp03.mgmt.cluster2	UN:johnb
@CO	PN:stampy	ID:stampy	VN:1.0.23_(r2059)	CL:--substitutionrate=0.01 -t 8 -g /tmp/R00000042 -h /tmp/R00000042 --readgroup=ID:WTCHG_246142_245101,SM:7c2f06_45,PL:ILLUMINA,PU:160129_D00417_0381_AHJ2VGBCXX_2,LB:VAU2662A45,DT:2016-01-31,CN:WTCHG --comment=@MISC/WTCHG_246142_245101.comments.txt -M FASTQ/WTCHG_246142_245101_1.fastq.gz,FASTQ/WTCHG_246142_245101_2.fastq.gz
@CO	ID:stampy	TM:Mon, 07 Mar 2016 21:33:54 GMT	WD:/tmp/usecase3938954872417714817dir	HN:gel-pipeline3	UN:compass
@CO	PN:stampy	ID:stampy	VN:1.0.23_(r2059)	CL:--substitutionrate=0.01 -g /tmp/usecase3938954872417714817dir/references/R00000042/R00000042 -h /tmp/usecase3938954872417714817dir/references/R00000042/R00000042 -M bam -o /tmp/usecase3938954872417714817dir/a00a7733-2cfc-46b3-a685-3657fdee6848.sam --logfile=/tmp/usecase3938954872417714817dir/a00a7733-2cfc-46b3-a685-3657fdee6848.sam.log --readgroup=ID:824f45e8-37f3-4cb9-8a05-63f0b7c9b959 --outputformat=sam -v 3
@CO	ID:stampy	TM:Sun, 31 Jan 2016 15:02:32 GMT	WD:/data1/GA-DATA/160129_D00417_0381_AHJ2VGBCXX/Data/Intensities/BaseCallsHN:comp01.mgmt.cluster2	UN:johnb
@CO	CMD:/home/compass/PIPELINE/mmmPipeline/compass/g4_stampy.py -b bam -r R00000042 -o output -ss seqstats -fs flagstats -g e865d957-12e5-479a-9a08-131dfa0e9a5e""")

reads_string = """HISEQ2500-09:381:HJ2VGBCXX:2:1101:10005:7635	99	R00000042	1619836	99	151M	=	1620303	618	CCAGAACAGGCGCGGGAAATGTGCGATACCGCGCGCAAACTGGGCAAGGTGCTGGCCTACGACTTTCACCATCGTTTTGCGCTCGATACGCAACAGCTGCGTGAACAGGTGACCAACGGCGTTTTGGGAGAGATTTACGTTACCACCGCCC	DDDDDIIIIIIIIIIIIIIIHIHIIIIIIIIGIIIIIIIIGIIIHHIIIIGIIIIIIIIIIHIIIIIIIIIIIIIIIIIHIICGHIIIIHGIIIIHIIIGIGHIIIIIIIIIHHIIIIGIICHHHHIIHEHIIIIIIIIHHIIIIIHHIII	PQ:i:205	SM:i:96	UQ:i:78	MQ:i:96	XQ:i:270	NM:i:2	RG:Z:824f45e8-37f3-4cb9-8a05-63f0b7c9b959
HISEQ2500-09:381:HJ2VGBCXX:2:1101:10005:7635	99	R00000042	1619836	99	151M	=	1620303	618	CCAGAACAGGCGCGGGAAATGTGCGATACCGCGCGCAAACTGGGCAAGGTGCTGGCCTACGACTTTCACCATCGTTTTGCGCTCGATACGCAACAGCTGCGTGAACAGGTGACCAACGGCGTTTTGGGAGAGATTTACGTTACCACCGCCC	DDDDDIIIIIIIIIIIIIIIHIHIIIIIIIIGIIIIIIIIGIIIHHIIIIGIIIIIIIIIIHIIIIIIIIIIIIIIIIIHIICGHIIIIHGIIIIHIIIGIGHIIIIIIIIIHHIIIIGIICHHHHIIHEHIIIIIIIIHHIIIIIHHIII	PQ:i:205	SM:i:96	UQ:i:78	MQ:i:96	XQ:i:270	NM:i:2	RG:Z:test-test
HISEQ2500-09:381:HJ2VGBCXX:2:1101:10005:7635	1123	R00000042	1619836	99	151M	=	1620303	618	CCAGAACAGGCGCGGGAAATGTGCGATACCGCGCGCAAACTGGGCAAGGTGCTGGCCTACGACTTTCACCATCGTTTTGCGCTCGATACGCAACAGCTGCGTGAACAGGTGACCAACGGCGTTTTGGGAGAGATTTACGTTACCACCGCCC	DDDDDIIIIIIIIIIIIIIIHIHIIIIIIIIGIIIIIIIIGIIIHHIIIIGIIIIIIIIIIHIIIIIIIIIIIIIIIIIHIICGHIIIIHGIIIIHIIIGIGHIIIIIIIIIHHIIIIGIICHHHHIIHEHIIIIIIIIHHIIIIIHHIII	PQ:i:205	SM:i:96	UQ:i:78	MQ:i:96	XQ:i:270	NM:i:2	RG:Z:824f45e8-37f3-4cb9-8a05-63f0b7c9b959
HISEQ2500-09:381:HJ2VGBCXX:2:1101:10005:7635	1123	R00000042	1619836	99	151M	=	1620303	618	CCAGAACAGGCGCGGGAAATGTGCGATACCGCGCGCAAACTGGGCAAGGTGCTGGCCTACGACTTTCACCATCGTTTTGCGCTCGATACGCAACAGCTGCGTGAACAGGTGACCAACGGCGTTTTGGGAGAGATTTACGTTACCACCGCCC	DDDDDIIIIIIIIIIIIIIIHIHIIIIIIIIGIIIIIIIIGIIIHHIIIIGIIIIIIIIIIHIIIIIIIIIIIIIIIIIHIICGHIIIIHGIIIIHIIIGIGHIIIIIIIIIHHIIIIGIICHHHHIIHEHIIIIIIIIHHIIIIIHHIII	PQ:i:205	SM:i:96	UQ:i:78	MQ:i:96	XQ:i:270	NM:i:2	RG:Z:824f45e8-37f3-4cb9-8a05-63f0b7c9b959
HISEQ2500-09:381:HJ2VGBCXX:2:1101:10005:7635	147	R00000042	1620303	99	151M	=	1619836	-618	TATGAACGTCAGCTTTTGTGGTGATAAAGCTGGTGCGACGCTGTTTCCAGCACATATCTACACCGATAACAACGGTGAATTAATGACGCTGATGCAACGGGAAATGGCAGACGACAACCGCCATTTGCGCAGCATGGAAGCCTTTATCAAT	[email protected]@@.@HG?EHHGCEEIHHEIIHIIHHIHHDHC@CFHIH@F70HEHEHCIIHHGHHHEIIIIIHIHGIIIIIIIIIIIHHHIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIDDDDD	PQ:i:205	SM:i:96	UQ:i:270	MQ:i:96	XQ:i:78	NM:i:7	RG:Z:824f45e8-37f3-4cb9-8a05-63f0b7c9b959
HISEQ2500-09:381:HJ2VGBCXX:2:1101:10005:7635	147	R00000042	1620303	99	151M	=	1619836	-618	TATGAACGTCAGCTTTTGTGGTGATAAAGCTGGTGCGACGCTGTTTCCAGCACATATCTACACCGATAACAACGGTGAATTAATGACGCTGATGCAACGGGAAATGGCAGACGACAACCGCCATTTGCGCAGCATGGAAGCCTTTATCAAT	[email protected]@@.@HG?EHHGCEEIHHEIIHIIHHIHHDHC@CFHIH@F70HEHEHCIIHHGHHHEIIIIIHIHGIIIIIIIIIIIHHHIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIDDDDD	PQ:i:205	SM:i:96	UQ:i:270	MQ:i:96	XQ:i:78	NM:i:7	RG:Z:824f45e8-37f3-4cb9-8a05-63f0b7c9b959
HISEQ2500-09:381:HJ2VGBCXX:2:1101:10005:7635	1171	R00000042	1620303	99	151M	=	1619836	-618	TATGAACGTCAGCTTTTGTGGTGATAAAGCTGGTGCGACGCTGTTTCCAGCACATATCTACACCGATAACAACGGTGAATTAATGACGCTGATGCAACGGGAAATGGCAGACGACAACCGCCATTTGCGCAGCATGGAAGCCTTTATCAAT	[email protected]@@.@HG?EHHGCEEIHHEIIHIIHHIHHDHC@CFHIH@F70HEHEHCIIHHGHHHEIIIIIHIHGIIIIIIIIIIIHHHIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIDDDDD	PQ:i:205	SM:i:96	UQ:i:270	MQ:i:96	XQ:i:78	NM:i:7	RG:Z:824f45e8-37f3-4cb9-8a05-63f0b7c9b959
HISEQ2500-09:381:HJ2VGBCXX:2:1101:10005:7635	1171	R00000042	1620303	99	151M	=	1619836	-618	TATGAACGTCAGCTTTTGTGGTGATAAAGCTGGTGCGACGCTGTTTCCAGCACATATCTACACCGATAACAACGGTGAATTAATGACGCTGATGCAACGGGAAATGGCAGACGACAACCGCCATTTGCGCAGCATGGAAGCCTTTATCAAT	[email protected]@@.@HG?EHHGCEEIHHEIIHIIHHIHHDHC@CFHIH@F70HEHEHCIIHHGHHHEIIIIIHIHGIIIIIIIIIIIHHHIIIIIFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIDDDDD	PQ:i:205	SM:i:96	UQ:i:270	MQ:i:96	XQ:i:78	NM:i:7	RG:Z:824f45e8-37f3-4cb9-8a05-63f0b7c9b959
HISEQ2500-09:381:HJ2VGBCXX:2:2201:15073:80781	1121	R00000042	1	70	1M7I143M	=	47901254790275	ATTTTTCAGCTTTTCATTCTGACTGCAATGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTCTCTGACAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATA	DDDDDHEHIHIIIIGHIIIIHHIIIIGIIHHFHHIHIFECFHHIGGHIIIFHEHIIIIIIIIHIHEHHIIIIIHHIIHIGIGH?HEHHHIIFGHHFHHHIHEFHIIIIIIHIIIIGHHHHHEHFHHHHHHHHHHEHHGHIFHHIGCHGHHH	PQ:i:375	SM:i:70	UQ:i:217	MQ:i:96XQ:i:186	NM:i:10	RG:Z:824f45e8-37f3-4cb9-8a05-63f0b7c9b959"""