def test_reverse(self): """Test reverse complement function""" from mirtop.mirna.realign import reverse_complement print "Testing ATGC complement" if "GCAT" != reverse_complement("ATGC"): logger.error("ATGC complement is not: %s" % reverse_complement("ATGC"))
def test_reverse(self): """Test reverse complement function""" from mirtop.mirna.realign import reverse_complement print("Testing ATGC complement") if "GCAT" != reverse_complement("ATGC"): raise ValueError("ATGC complement is not: %s" % reverse_complement("ATGC"))
def _bed(handle, bed_fn): with open(bed_fn, 'w') as outh: for line in handle: if line.startswith("@"): continue cols = line.strip().split() if cols[2]=="*": logger.debug("READ::Sequence not mapped: %s" % cols[0]) continue query_name = cols[0] query_sequence = cols[9] counts = cols[14] start = int(cols[3]) strand = "-" if cols[1] == "16" else "+" chrom = cols[2] # if there no hits # if the sequence always matching the read, assuming YES now # if not current or query_name!=current: query_sequence = query_sequence if not strand=="-" else reverse_complement(query_sequence) # logger.debug(("READ::Read name:{0} and Read sequence:{1}").format(line.query_name, sequence)) if query_sequence and query_sequence.find("N") > -1: continue end = start + len(query_sequence) - 1 bed_line = "\t".join(list(map(str, [chrom, start, end, query_name, query_sequence, strand, counts]))) outh.write(bed_line + '\n')
def _analyze_line(line, reads, precursors, handle, args): if line.reference_id < 0: logger.debug("READ::Sequence not mapped: %s" % line.reference_id) return reads if not line.cigarstring: logger.debug("READ::Sequence malformed: %s" % line) return reads query_name = line.query_name if query_name not in reads and not line.query_sequence: return reads sequence = line.query_sequence if not line.is_reverse else reverse_complement( line.query_sequence) logger.debug(("READ::Read name:{0} and Read sequence:{1}").format( line.query_name, sequence)) if line.query_sequence and line.query_sequence.find("N") > -1: return reads if query_name not in reads: reads[query_name].set_sequence(sequence) reads[query_name].counts = _get_freq(query_name) # TODO if args.quant set to 0 # TODO if args.quant increase by 1 if line.is_reverse and not args.genomic: logger.debug("READ::Sequence is reverse: %s" % line.query_name) return reads chrom = handle.getrname(line.reference_id) start = line.reference_start cigar = line.cigartuples # if line.cigarstring.find("I") > -1: # indels_skip += 1 iso = isomir() iso.align = line iso.set_pos(start, len(reads[query_name].sequence)) logger.debug("READ::From BAM start %s end %s at chrom %s" % (iso.start, iso.end, chrom)) if len(precursors[chrom].replace("N", "")) + 3 < start + len( reads[query_name].sequence): logger.debug("READ::%s start + %s sequence size are bigger than" " size precursor %s" % (line.reference_id, len( reads[query_name].sequence), len(precursors[chrom]))) return reads iso.subs, iso.add, iso.cigar = filter.tune(reads[query_name].sequence, precursors[chrom], start, cigar) logger.debug("READ::After tune start %s end %s" % (iso.start, iso.end)) logger.debug("READ::iso add %s iso subs %s" % (iso.add, iso.subs)) reads[query_name].set_precursor(chrom, iso) return reads
def _analyze_quick_line(line, reads): if line.reference_id < 0: logger.debug("READ::Sequence not mapped: %s" % line.reference_id) return reads if not line.cigarstring: logger.debug("READ::Sequence malformed: %s" % line) return reads query_name = line.query_name if query_name not in reads and not line.query_sequence: return reads sequence = line.query_sequence if not line.is_reverse else reverse_complement( line.query_sequence) logger.debug(("READ::Read name:{0} and Read sequence:{1}").format( line.query_name, sequence)) if line.query_sequence and line.query_sequence.find("N") > -1: return reads if query_name not in reads: reads[query_name].set_sequence(sequence) reads[query_name].counts = _get_freq(query_name) return reads
def _bed(bam_fn, bed_fn): mode = "r" if bam_fn.endswith("sam") else "rb" handle = pysam.Samfile(bam_fn, mode) current = None if os.path.exists(bed_fn): return bed_fn with open(bed_fn, 'w') as outh: for line in handle: if line.reference_id < 0: logger.debug("READ::Sequence not mapped: %s" % line.reference_id) continue if not line.cigarstring: logger.debug("READ::Sequence malformed: %s" % line) continue query_name = line.query_name if (not current or query_name != current) and not line.query_sequence: continue if not current or query_name != current: sequence = line.query_sequence if not line.is_reverse else reverse_complement( line.query_sequence) logger.debug(("READ::Read name:{0} and Read sequence:{1}").format( line.query_name, sequence)) if line.query_sequence and line.query_sequence.find("N") > -1: continue chrom = handle.getrname(line.reference_id) start = line.reference_start end = start + len(sequence) - 1 current = query_name strand = "+" if not line.is_reverse else "-" bed_line = "\t".join( list( map(str, [chrom, start, end, query_name, sequence, strand]))) outh.write(bed_line + '\n')