Пример #1
0
def make_joined_read(mate, reads, tags=None):
    if tags is None:
        tags = []
    # flip reverse strand reads
    if not reads[0].is_unmapped and reads[0].is_reverse:
        reads = sorted(reads, reverse=True)
    # make new reads
    a = pysam.AlignedRead()
    # create paired-end reads but do not mark them
    # as proper pairs and set all mate information
    # to 'unmapped'
    a.qname = reads[0].qname
    a.seq = ''.join(r.seq for r in reads)
    a.qual = ''.join(r.qual for r in reads)
    a.is_paired = True
    a.is_proper_pair = False
    a.mate_is_unmapped = True
    a.mrnm = -1
    a.mpos = -1
    if mate == 0:
        a.is_read1 = True
        a.is_read2 = False
    else:
        a.is_read1 = False
        a.is_read2 = True
    a.isize = 0
    a.mapq = 255
    a.is_unmapped = reads[0].is_unmapped
    if a.is_unmapped:
        a.rname = -1
        a.pos = 0
        # add the XM tag from bowtie saying whether unmapped
        # due to multimapping or other reason
        xm_tag = min(r.opt('XM') for r in reads)
        tags.append(('XM', xm_tag))
    else:
        a.is_reverse = reads[0].is_reverse
        a.rname = reads[0].rname
        a.pos = reads[0].pos
        a.cigar = ((0, len(a.seq)), )
        # compute edit dist
        edit_dist = 0
        for r in reads:
            edit_dist += r.opt('NM')
        tags.append(('NM', edit_dist))
        # compute mismatches to reference (MD)
        tags.append(('MD', merge_MD_tags([r.opt('MD') for r in reads])))
    a.tags = tags
    return a
Пример #2
0
def translate_read(read, chrom, strand, intervals):
    # skip unmapped reads
    if read.is_unmapped:
        return read
    elif chrom == -1:
        #logging.warning("discarded alignment %s that does not map to genomic references and cannot be translated" % (str(read)))
        # throw away reads that cannot be translated by
        # creating a dummy unmapped read
        a = pysam.AlignedRead()
        a.qname = read.qname
        a.seq = read.seq
        a.is_unmapped = True
        a.rname = -1
        a.pos = -1
        a.mapq = 0
        a.mrnm = -1
        a.mpos = -1
        a.isize = 0
        a.qual = read.qual
        a.tags = [("XM", 0)]
        return a
    elif (chrom >= 0) and (intervals is None):
        # read maps directly to a genomic reference so simply
        # alter the reference id to correctly refer to the new
        # SAM header
        read.rname = chrom
        return read
    genomic_intervals = translate_transcriptome_to_genomic_intervals(
        read, chrom, strand, intervals)
    spliced, cigar = get_cigar(genomic_intervals)
    if spliced:
        read.tags = read.tags + [("XS", "-" if strand else "+")]
    # modify read
    read.rname = chrom
    read.pos = genomic_intervals[0][0]
    read.cigar = cigar
    # flip reads that aligned to negative strand genes
    if strand == STRAND_REV:
        rev_quals = read.qual[::-1]
        read.is_reverse = not read.is_reverse
        read.seq = DNA_reverse_complement(read.seq)
        read.qual = rev_quals
        new_tags = []
        for name, val in read.tags:
            if name == 'MD':
                val = reverse_complement_MD_tag(val)
            new_tags.append((name, val))
        read.tags = new_tags
    return read
Пример #3
0
def copy_read(r):
    a = pysam.AlignedRead()
    a.qname = r.qname
    a.seq = r.seq
    a.flag = r.flag
    a.rname = r.rname
    a.pos = r.pos
    a.mapq = r.mapq
    a.cigar = r.cigar
    a.mrnm = r.mrnm
    a.mpos = r.mpos
    a.isize = r.isize
    a.qual = r.qual
    a.tags = r.tags
    return a
Пример #4
0
def make_unmapped_copy(r):
    a = pysam.AlignedRead()
    a.qname = r.qname
    a.seq = r.seq
    a.qual = r.qual
    a.is_unmapped = True
    a.is_qcfail = False
    a.is_paired = True
    a.is_proper_pair = False
    a.mate_is_unmapped = True
    a.mrnm = -1
    a.mpos = -1
    a.is_read1 = r.is_read1
    a.is_read2 = r.is_read2
    a.isize = 0
    a.mapq = 255
    a.is_reverse = False
    a.rname = -1
    a.pos = 0
    a.cigar = ()
    a.tags = (('XM', 0), )
    return a
Пример #5
0
def fastq_to_bam(fastq_files, qual_format, bam_file):
    fqfhs = [parse_fastq(open(f)) for f in fastq_files]
    qual_func = get_qual_conversion_func(qual_format)
    header = {'HD': {'VN': '1.0', 'SO': 'unknown'}}
    #              'SQ': [{'LN': 1, 'SN': 'dummy'}]}
    bamfh = pysam.Samfile(bam_file, "wb", header=header)
    try:
        while True:
            for i, fqiter in enumerate(fqfhs):
                id, seq, qual = fqiter.next()
                a = pysam.AlignedRead()
                a.rname = -1
                a.mrnm = -1
                #a.pos = 0
                #a.mpos = 0
                a.qname = id
                a.seq = seq
                a.qual = qual_func(qual)
                a.is_read1 = (i == 0)
                a.is_read2 = (i == 1)
                bamfh.write(a)
    except StopIteration:
        pass
    bamfh.close()