def cropGFF(gffs, options): """crop intervals in gff file.""" # read regions to crop with and convert intervals to intersectors E.info("reading gff for cropping: started.") other_gffs = GTF.iterator(IOTools.openFile(options.crop, "r")) cropper = GTF.readAsIntervals(other_gffs) ntotal = 0 for contig in cropper.keys(): intersector = bx.intervals.intersection.Intersecter() for start, end in cropper[contig]: intersector.add_interval(bx.intervals.Interval(start, end)) ntotal += 1 cropper[contig] = intersector E.info("reading gff for cropping: finished.") E.info("reading gff for cropping: %i contigs with %i intervals." % (len(cropper), ntotal)) ninput, noutput, ncropped, ndeleted = 0, 0, 0, 0 # do the actual cropping for gff in gffs: ninput += 1 if gff.contig in cropper: start, end = gff.start, gff.end overlaps = cropper[gff.contig].find(start, end) if overlaps: l = end - start a = numpy.ones(l) for i in overlaps: s = max(0, i.start - start) e = min(l, i.end - start) a[s:e] = 0 segments = Intervals.fromArray(a) if len(segments) == 0: ndeleted += 1 else: ncropped += 1 for s, e in segments: gff.start, gff.end = s + start, e + start noutput += 1 options.stdout.write("%s\n" % gff) continue noutput += 1 options.stdout.write("%s\n" % gff) if options.loglevel >= 1: options.stdlog.write( "# ninput=%i, noutput=%i, ncropped=%i, ndeleted=%i\n" % (ninput, noutput, ncropped, ndeleted))
def cropGFF(gffs, options): """crop intervals in gff file.""" # read regions to crop with and convert intervals to intersectors E.info("reading gff for cropping: started.") other_gffs = GTF.iterator(IOTools.openFile(options.crop, "r")) cropper = GTF.readAsIntervals(other_gffs) ntotal = 0 for contig in cropper.keys(): intersector = bx.intervals.intersection.Intersecter() for start, end in cropper[contig]: intersector.add_interval(bx.intervals.Interval(start, end)) ntotal += 1 cropper[contig] = intersector E.info("reading gff for cropping: finished.") E.info("reading gff for cropping: %i contigs with %i intervals." % (len(cropper), ntotal)) ninput, noutput, ncropped, ndeleted = 0, 0, 0, 0 # do the actual cropping for gff in gffs: ninput += 1 if gff.contig in cropper: start, end = gff.start, gff.end overlaps = cropper[gff.contig].find(start, end) if overlaps: l = end - start a = numpy.ones(l) for i in overlaps: s = max(0, i.start - start) e = min(l, i.end - start) a[s:e] = 0 segments = Intervals.fromArray(a) if len(segments) == 0: ndeleted += 1 else: ncropped += 1 for s, e in segments: gff.start, gff.end = s + start, e + start noutput += 1 options.stdout.write("%s\n" % gff) continue noutput += 1 options.stdout.write("%s\n" % gff) if options.loglevel >= 1: options.stdlog.write("# ninput=%i, noutput=%i, ncropped=%i, ndeleted=%i\n" % ( ninput, noutput, ncropped, ndeleted))
def testArray2(self): """test longer array.""" a = [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1] self.assertEqual(Intervals.fromArray(a), [(0, 3), (6, 9), (12, 15)]) self.assertEqual(Intervals.fromArray([not x for x in a]), [(3, 6), (9, 12)])
def testArray1(self): """test simple array.""" a = [1, 1, 1, 0, 0, 0, 1, 1, 1] self.assertEqual(Intervals.fromArray(a), [(0, 3), (6, 9)]) self.assertEqual(Intervals.fromArray([not x for x in a]), [(3, 6)])
def testEmpty(self): """test empty input.""" self.assertEqual(Intervals.fromArray([]), [])
def testArray2(self): """test longer array.""" a = [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1] self.assertEqual(Intervals.fromArray(a), [(0, 3), (6, 9), (12, 15)]) self.assertEqual( Intervals.fromArray([not x for x in a]), [(3, 6), (9, 12)])
def cropGFF(gffs, filename_gff): """crop intervals in gff file.""" # read regions to crop with and convert intervals to intersectors E.info("reading gff for cropping: started.") other_gffs = GTF.iterator(IOTools.open_file(filename_gff, "r")) cropper = GTF.readAsIntervals(other_gffs) ntotal = 0 for contig in list(cropper.keys()): intersector = quicksect.IntervalTree() for start, end in cropper[contig]: intersector.add(start, end) ntotal += 1 cropper[contig] = intersector E.info("reading gff for cropping: finished.") E.info("reading gff for cropping: %i contigs with %i intervals." % (len(cropper), ntotal)) ninput, noutput, ncropped, ndeleted = 0, 0, 0, 0 # do the actual cropping for gff in gffs: ninput += 1 if gff.contig in cropper: start, end = gff.start, gff.end overlaps = cropper[gff.contig].find(quicksect.Interval(start, end)) if overlaps: l = end - start a = numpy.ones(l) for i in overlaps: s = max(0, i.start - start) e = min(l, i.end - start) a[s:e] = 0 segments = Intervals.fromArray(a) if len(segments) == 0: ndeleted += 1 else: ncropped += 1 for s, e in segments: gff.start, gff.end = s + start, e + start noutput += 1 yield (gff) continue noutput += 1 yield (gff) E.info("ninput=%i, noutput=%i, ncropped=%i, ndeleted=%i" % (ninput, noutput, ncropped, ndeleted))