def trimMatchOverlapsInBoth(inpfile,outfile,trim_subtype): gp = MyFile.myfile() MatchRecord.sortInXorderAP(inpfile,gp) # The following coalescing assumes perfect runs. hp = MyFile.myfile() coalesceMatches( gp, hp, ((trim_subtype == 'x') or (trim_subtype == 'u')) ) gp = MyFile.myfile() trimMatchOverlapsInX(hp,gp,trim_subtype) hp = MyFile.myfile() MatchRecord.sortInYorderAP(gp,hp) trimMatchOverlapsInY(hp,outfile,trim_subtype) return
def trimMatchOverlapsInBoth(inpfile, outfile, trim_subtype): gp = MyFile.myfile() MatchRecord.sortInXorderAP(inpfile, gp) # The following coalescing assumes perfect runs. hp = MyFile.myfile() coalesceMatches(gp, hp, ((trim_subtype == 'x') or (trim_subtype == 'u'))) gp = MyFile.myfile() trimMatchOverlapsInX(hp, gp, trim_subtype) hp = MyFile.myfile() MatchRecord.sortInYorderAP(gp, hp) trimMatchOverlapsInY(hp, outfile, trim_subtype) return
def applyBothKeepMasks(inpfile, outfile): # Maybe we can think of a masking implementation where each ATAC match # is treated atomicly. Assume that the keep mask intervals are sorted # by start postition. Assume that the ATAC matches are sorted by start # postion. Assert that all keep mask intervals are non-overlapping and # were cut from only one ATAC match. Thus the mapping from keep mask # intervals is a function. Note that this requires that we do not # coalesce abutting keep mask intervals that originate from multiple # matches. Note this still allows an ATAC match to overlap more than # one keep mask interval. Ignore all keep mask intervals with zero # length their creation has tie breaking problems. See notes on 2003 # Jul 29. debug = 0 debugnum = 0 inpfile.seek(0) outfile.seek(0) # Apply the keepMask for the first axis. # Make the sorted the keep mask intervals for the first axis. processFirstAxis = 1 keepMaskFile = MyFile.myfile() tmpfile2 = inpfile tmpfile3 = MyFile.myfile() tmpfile4 = MyFile.myfile() findCoverageIntervals(inpfile, keepMaskFile, processFirstAxis) if debug: debugnum += 1 debugfile = open("debugfile.%d" % debugnum, "w") for line in keepMaskFile: print >> debugfile, line, MatchRecord.sortInXorderAP(tmpfile2, tmpfile3) if debug: #tmpfile2.seek(0) #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") #for line in tmpfile2: print >>debugfile, line, tmpfile3.seek(0) debugnum += 1 debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile3: print >> debugfile, line, applyOneKeepMask(tmpfile3, tmpfile4, keepMaskFile, processFirstAxis) if debug: tmpfile4.seek(0) debugnum += 1 debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile4: print >> debugfile, line, # Apply the keepMask for the second axis. # Make the sorted the keep mask intervals for the second axis. processFirstAxis = 0 keepMaskFile = MyFile.myfile() tmpfile2 = tmpfile4 tmpfile3 = MyFile.myfile() tmpfile4 = outfile findCoverageIntervals(inpfile, keepMaskFile, processFirstAxis) if debug: debugnum += 1 debugfile = open("debugfile.%d" % debugnum, "w") for line in keepMaskFile: print >> debugfile, line, MatchRecord.sortInYorderAP(tmpfile2, tmpfile3) if debug: #tmpfile2.seek(0) #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") #for line in tmpfile2: print >>debugfile, line, tmpfile3.seek(0) debugnum += 1 debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile3: print >> debugfile, line, applyOneKeepMask(tmpfile3, tmpfile4, keepMaskFile, processFirstAxis) if debug: tmpfile4.seek(0) debugnum += 1 debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile4: print >> debugfile, line,
def applyBothKeepMasks( inpfile, outfile ): # Maybe we can think of a masking implementation where each ATAC match # is treated atomicly. Assume that the keep mask intervals are sorted # by start postition. Assume that the ATAC matches are sorted by start # postion. Assert that all keep mask intervals are non-overlapping and # were cut from only one ATAC match. Thus the mapping from keep mask # intervals is a function. Note that this requires that we do not # coalesce abutting keep mask intervals that originate from multiple # matches. Note this still allows an ATAC match to overlap more than # one keep mask interval. Ignore all keep mask intervals with zero # length their creation has tie breaking problems. See notes on 2003 # Jul 29. debug = 0 debugnum = 0 inpfile.seek(0) outfile.seek(0) # Apply the keepMask for the first axis. # Make the sorted the keep mask intervals for the first axis. processFirstAxis = 1 keepMaskFile = MyFile.myfile() tmpfile2 = inpfile tmpfile3 = MyFile.myfile() tmpfile4 = MyFile.myfile() findCoverageIntervals( inpfile, keepMaskFile, processFirstAxis) if debug: debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in keepMaskFile: print >>debugfile, line, MatchRecord.sortInXorderAP(tmpfile2,tmpfile3) if debug: #tmpfile2.seek(0) #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") #for line in tmpfile2: print >>debugfile, line, tmpfile3.seek(0) debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile3: print >>debugfile, line, applyOneKeepMask( tmpfile3, tmpfile4, keepMaskFile, processFirstAxis) if debug: tmpfile4.seek(0) debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile4: print >>debugfile, line, # Apply the keepMask for the second axis. # Make the sorted the keep mask intervals for the second axis. processFirstAxis = 0 keepMaskFile = MyFile.myfile() tmpfile2 = tmpfile4 tmpfile3 = MyFile.myfile() tmpfile4 = outfile findCoverageIntervals( inpfile, keepMaskFile, processFirstAxis) if debug: debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in keepMaskFile: print >>debugfile, line, MatchRecord.sortInYorderAP(tmpfile2,tmpfile3) if debug: #tmpfile2.seek(0) #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") #for line in tmpfile2: print >>debugfile, line, tmpfile3.seek(0) debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile3: print >>debugfile, line, applyOneKeepMask( tmpfile3, tmpfile4, keepMaskFile, processFirstAxis) if debug: tmpfile4.seek(0) debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w") for line in tmpfile4: print >>debugfile, line,