def create_read_list(samfile):
    read_sampler = ReadSampler()
    for line in samfile:
        line = sam_utils.SamAlignment(line)
        vals = line.get_aligned_blocks()
        if len(vals) > 1:
            logging.info("Skipping gapped read %s %s" %
                         (line.QNAME, str(vals)))
        read_sampler.add_read(vals[0])
    return read_sampler
Exemplo n.º 2
0
def create_read_list_paired(samfile):
    """ Read in a samfile and convert it to a list of reads for the sampler
    object.

    This function is for paired end reads only. Skips any reads that are gapped
    reads or are not properly paired. Assumes samfile is sorted by readname and
    only one alignment per pair is present in the file. If these assumptions
    are not met than this function will yield nonsense.

    Args:
        samfile (fhandle): an open filehandle for reading of a samfile

    Returns:
        read_sampler(obj(ReadSampler)): final read sampler for the samfile

    Raises:
        ValueError: If pair readnames don't match. Not considered a failsafe
        for catching violations of assumptions above but should catch most
        mistakes.
    """
    read_sampler = ReadSampler()
    while True:
        line1 = samfile.readline()
        line2 = samfile.readline()
        if not line2:
            break
        line1 = sam_utils.SamAlignment(line1)
        line2 = sam_utils.SamAlignment(line2)
        if line1.QNAME != line2.QNAME:
            raise ValueError("Unpaired read or read with more than one pair\
                              encountered. Check your input file. File must\
                              be sorted by read name, every read must have\
                              a single pair and each pair must have one\
                              mapping. %s %s" % (line1.QNAME, line2.QNAME))
        try:
            read_sampler.add_read(get_paired_blocks(line1, line2))
        except ValueError as err:
            logging.error("Skipping pair %s" % err)
        except RuntimeError as err:
            logging.error("Skipping pair %s" % err)
    return read_sampler
def create_read_list_paired(samfile):
    read_sampler = ReadSampler()
    while True:
        line1 = samfile.readline()
        line2 = samfile.readline()
        if not line2:
            break
        line1 = sam_utils.SamAlignment(line1)
        line2 = sam_utils.SamAlignment(line2)
        if line1.QNAME != line2.QNAME:
            raise ValueError("Unpaired read or read with more than one pair\
                              encountered. Check your input file. File must\
                              be sorted by read name, every read must have\
                              a single pair and each pair must have one\
                              mapping. %s %s" % (line1.QNAME, line2.QNAME))
        try:
            read_sampler.add_read(get_paired_blocks(line1, line2))
        except ValueError as err:
            logging.error("Skipping pair %s" % err)
        except RuntimeError as err:
            logging.error("Skipping pair %s" % err)
    return read_sampler
Exemplo n.º 4
0
def main():
    prefix = sys.argv[1]
    gff = sys.argv[2]
    locs = read_list_of_locations(gff,
                                  ["gi|48994873|gb|U00096.2|mod|ATCC.47076|"])
    read_set = set()
    with open(prefix + "_bad_reads.txt", mode="w") as f:
        for line in sys.stdin:
            read = sam_utils.SamAlignment(line)
            try:
                if read.sense_to_ref(True, "R2"):
                    strand = "+"
                else:
                    strand = "-"
            except:
                continue
            start, end, gaps = read.start_end_gaps(True)
            if read_overlap(locs[read.RNAME][strand], [(start, end)]):
                f.write(read.QNAME + "\n")
            else:
                continue
Exemplo n.º 5
0
def create_read_list(samfile):
    """ Read in a samfile and convert it to a list of reads for the sampler
    object
        
    This function is for single end reads only. Skips any reads that are
    gapped reads

    Args:
        samfile (fhandle): an open filehandle for reading of a samfile

    Returns:
        read_sampler(obj(ReadSampler)): final read sampler for the samfile

    """
    read_sampler = ReadSampler()
    for line in samfile:
        line = sam_utils.SamAlignment(line)
        vals = line.get_aligned_blocks()
        if len(vals) > 1:
            logging.info("Skipping gapped read %s %s"%(line.QNAME, str(vals)))     
        read_sampler.add_read(vals[0])
    return read_sampler