from gtfIO import GTFReader, GTFWriter from optparse import OptionParser parser = OptionParser() parser.add_option("--input", dest="input", help="A gtf file containing the RepeatMasked gtf annotation") parser.add_option("--minlen", dest="minleng", help="minimum length") parser.add_option("--output", dest="output", help="A gtf output file") parser.add_option("--maxdiv", dest="maxdiv", help="minimum length", default=99999999999999999) (options, args) = parser.parse_args() minleng = int(options.minleng) w = GTFWriter(options.output) for e in GTFReader(options.input): leng = (e.end - e.start) + 1 if (leng >= minleng): if (e.score < float(options.maxdiv)): w.write(e) w.close()
from gtfIO import GTFReader,GTFWriter; import sys import random from optparse import OptionParser, OptionGroup import collections parser = OptionParser() parser.add_option("--input",dest="input",help="A gtf file containing the RepeatMasked gtf annotation") parser.add_option("--min-leng",dest="minleng",help="minimum length") parser.add_option("--output",dest="output",help="A gtf output file") (options, args) = parser.parse_args() minleng=int(options.minleng) w=GTFWriter(options.output) for e in GTFReader(options.input): leng=(e.end-e.start)+1 if(leng>=minleng): w.write(e) w.close()
active=chrlist.pop(0) while(True): tested=[] activeUnmodified=True while(len(chrlist)>0): totest=chrlist.pop(0) #if(comparecount%100000==0): # print "Finished {0} comparision; len chrlist {1}".format(comparecount,len(chrlist)) comparecount+=1 testdist,narrowdist=get_distance(active,totest) if(testdist < maxdist): active = mergeNucmer(active,totest,testdist) activeUnmodified=False else: tested.append(totest) chrlist=tested if activeUnmodified: break chrentries.append(active) print "Finished {0} with {1} fragments".format(chr,len(chrentries)) finallist.extend(chrentries) refgtf=get_refgtflist(finallist) querygtf=get_querygtflist(finallist) GTFWriter.write_all(options.outputref,refgtf) # (cls,file,gtfentries): GTFWriter.write_all(options.outputquery,querygtf) if(options.outputdist): print_distances(options.outputdist,finallist)
while (len(chrlist) > 0): active = chrlist.pop(0) while (True): tested = [] activeUnmodified = True while (len(chrlist) > 0): totest = chrlist.pop(0) #if(comparecount%100000==0): # print "Finished {0} comparision; len chrlist {1}".format(comparecount,len(chrlist)) comparecount += 1 testdist, narrowdist = get_distance(active, totest) if (testdist < maxdist): active = mergeNucmer(active, totest, testdist) activeUnmodified = False else: tested.append(totest) chrlist = tested if activeUnmodified: break chrentries.append(active) print "Finished {0} with {1} fragments".format(chr, len(chrentries)) finallist.extend(chrentries) refgtf = get_refgtflist(finallist) querygtf = get_querygtflist(finallist) GTFWriter.write_all(options.outputref, refgtf) # (cls,file,gtfentries): GTFWriter.write_all(options.outputquery, querygtf) if (options.outputdist): print_distances(options.outputdist, finallist)
help="A gtf file containing the reference annotation") parser.add_option( "--totest", dest="totest", help="A gtf file containing the annotation that should be tested") parser.add_option( "--output", dest="output", help= "The output file containing novel TE insertions not in the reference genome" ) (options, args) = parser.parse_args() # chromosomes with incomplete reference annotation: 2L, 2R, 3L # fine: 3R, X, 4 rawref = GTFReader.readall(options.reference) rawtotest = GTFReader.readall(options.totest) chref = convert_chrhash(rawref) chtotest = convert_chrhash(rawtotest) ofh = GTFWriter(options.output) for chr in ["X", "2L", "2R", "3L", "3R", "4"]: cref = chref[chr] ctotest = chtotest[chr] ne = novelentries(cref, ctotest) for n in ne: ofh.write(n) ofh.close()
parser = OptionParser() parser.add_option("--reference",dest="reference",help="A gtf file containing the reference annotation") parser.add_option("--totest",dest="totest",help="A gtf file containing the annotation that should be tested") parser.add_option("--output",dest="output",help="The output file containing novel TE insertions not in the reference genome") (options, args) = parser.parse_args() # chromosomes with incomplete reference annotation: 2L, 2R, 3L # fine: 3R, X, 4 rawref=GTFReader.readall(options.reference) rawtotest=GTFReader.readall(options.totest) chref=convert_chrhash(rawref) chtotest=convert_chrhash(rawtotest) ofh=GTFWriter(options.output) for chr in ["X","2L","2R","3L","3R","4"]: cref=chref[chr] ctotest=chtotest[chr] ne=novelentries(cref,ctotest) for n in ne: ofh.write(n) ofh.close()