reads = {} for entry in fastaIterator(rfh): reads[str(entry.name)] = str(entry.seq) sys.stderr.write("Loaded reads\n") alignmentIt = getNucmerAlignmentIterator(afh) sys.stderr.write("Loaded Alignments\n"); counter = 0 for name,group in groupby(alignmentIt, lambda x: x.sname): #build coverage vector cov = getCoverageFromNucAlignments(group) #mark the regions with 0 (no) coverage as 1 and change #everything else to 0 cov_inv = map(lambda c: 1 if c == 0 else 0, cov) #ranges with zero coverage zero_cov_ranges = getMarkedRanges(cov_inv) seq = reads[name] #calculate GC % for windows of GC_WINDOW_SIZE gc_sliding_window = getGCSlidingWindow(seq, GC_WINDOW_SIZE) #filter gaps that are at > MIN_COV_GAP #and have at least one base > GC_THRESHOLD
COV_GAP_MIN = int(sys.argv[2]) fhist = open(sys.argv[3] + ".uncov.hist", "w") freg = open(sys.argv[3] + ".uncov.regions", "w") ftbases = open(sys.argv[3] + ".uncov.total.bases", "w") pcov = [] #pct cov total_bases = 0 total_uncovered_bases = 0 for pbname, alignments in groupby(getNucmerAlignmentIterator(fh), lambda x: x.sname): a = list(alignments) cov = getCoverageFromNucAlignments(a) #mark the 0 coverage regions zcov = map(lambda c: 1 if c == 0 else 0, cov) #ranges with 0 coverage zcov_ranges = getMarkedRanges(zcov) #only look at the gaps larger than the min gap size zcov_ranges_filt = filter(lambda (x, y): y - x > COV_GAP_MIN, zcov_ranges) #write out the regions that pass filter to region file freg.write("\t".join( [pbname, " ".join(map(lambda t: "%d,%d" % t, zcov_ranges_filt))]) + "\n")
fh = open(sys.argv[1]) COV_GAP_MIN = int(sys.argv[2]) fhist = open(sys.argv[3] + ".uncov.hist", "w") freg = open(sys.argv[3] + ".uncov.regions", "w") ftbases = open(sys.argv[3] + ".uncov.total.bases", "w") pcov = [] # pct cov total_bases = 0 total_uncovered_bases = 0 for pbname, alignments in groupby(getNucmerAlignmentIterator(fh), lambda x: x.sname): a = list(alignments) cov = getCoverageFromNucAlignments(a) # mark the 0 coverage regions zcov = map(lambda c: 1 if c == 0 else 0, cov) # ranges with 0 coverage zcov_ranges = getMarkedRanges(zcov) # only look at the gaps larger than the min gap size zcov_ranges_filt = filter(lambda (x, y): y - x > COV_GAP_MIN, zcov_ranges) # write out the regions that pass filter to region file freg.write("\t".join([pbname, " ".join(map(lambda t: "%d,%d" % t, zcov_ranges_filt))]) + "\n") total_bases += a[0].slen for rbeg, rend in zcov_ranges_filt:
reads = {} for entry in fastaIterator(rfh): reads[str(entry.name)] = str(entry.seq) sys.stderr.write("Loaded reads\n") alignmentIt = getNucmerAlignmentIterator(afh) sys.stderr.write("Loaded Alignments\n") counter = 0 for name, group in groupby(alignmentIt, lambda x: x.sname): #build coverage vector cov = getCoverageFromNucAlignments(group) #mark the regions with 0 (no) coverage as 1 and change #everything else to 0 cov_inv = map(lambda c: 1 if c == 0 else 0, cov) #ranges with zero coverage zero_cov_ranges = getMarkedRanges(cov_inv) seq = reads[name] #calculate GC % for windows of GC_WINDOW_SIZE gc_sliding_window = getGCSlidingWindow(seq, GC_WINDOW_SIZE) #filter gaps that are at > MIN_COV_GAP #and have at least one base > GC_THRESHOLD