def Main(): ''' IO TEMPLATE ''' global args,out args=ParseArg() fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") ''' END OF IO TEMPLATE ''' print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION, print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )" print >>out,"# Date: ",time.asctime() print >>out,"# The command line is :" print >>out,"#\t"," ".join(sys.argv) dbi=DBI.init(args.db,Tools.guess_format(args.db)) references=dbi.bamfiles[0].references for i in TableIO.parse(fin,args.format): print i n=0 c_count=0 reads=dbi.query(i,args.method) for read in reads: compatible=Tools.compatible_with_transcript(read,i,references=references,strand=args.strand) print "HT:" for i0,r in enumerate(TableIO.parse(read.reads,"bam2bed12",references=references)): print "READ"+str(i0)+"\t",r print "COMPATIBLE:",compatible,"\n\n" if compatible: c_count+=1 n+=1 print "COMPATIBLE / ALL OVERLAP READS = ",c_count,"/",n print "RATIO\t%.4f"%float(c_count)/n
def fopen(file,mode="r",**kwargs): ''' ''' if Tools.guess_format(file)=="bam" and mode=="r": return pysam.Samfile(file,"rb") if mode=="w": return open_output(file) if mode=="r": return open_input(file) return None
def fopen(file, mode="r", **kwargs): ''' ''' if Tools.guess_format(file) == "bam" and mode == "r": return pysam.Samfile(file, "rb") if mode == "w": return open_output(file) if mode == "r": return open_input(file) return None
def Main(): ''' This program is a test for TableIO.parse(file.bam,"bam2bed") ''' global args,out args=ParseArg() fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") if args.format=="guess": args.format=Tools.guess_format(args.input) s=TableIO.parse(args.input,args.format) for i in s: print >>out,i
def Main(): ''' IO TEMPLATE ''' global args, out args = ParseArg() fin = IO.fopen(args.input, "r") out = IO.fopen(args.output, "w") ''' END OF IO TEMPLATE ''' print >> out, "# This data was generated by program ", sys.argv[ 0], " (version: %s)" % VERSION, print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )" print >> out, "# Date: ", time.asctime() print >> out, "# The command line is :" print >> out, "#\t", " ".join(sys.argv) dbi = DBI.init(args.db, Tools.guess_format(args.db)) references = dbi.bamfiles[0].references for i in TableIO.parse(fin, args.format): print i n = 0 c_count = 0 reads = dbi.query(i, args.method) for read in reads: compatible = Tools.compatible_with_transcript( read, i, references=references, strand=args.strand) print "HT:" for i0, r in enumerate( TableIO.parse(read.reads, "bam2bed12", references=references)): print "READ" + str(i0) + "\t", r print "COMPATIBLE:", compatible, "\n\n" if compatible: c_count += 1 n += 1 print "COMPATIBLE / ALL OVERLAP READS = ", c_count, "/", n print "RATIO\t%.4f" % float(c_count) / n
def Main(): global args,out args=ParseArg() dict={} if args.output=="stdout": out=sys.stdout else: try: out=open(args.output,"w") except IOError: print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead" out=sys.stdout argv=sys.argv argv[0]=argv[0].split("/")[-1] print >>out,"# This data was generated by program ",argv[0],"(version %s)"%VERSION, print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )" print >>out,"# Date: ",time.asctime() print >>out,"# The command line is :\n#\t"," ".join(argv) init_dict={} if args.dbformat=="guess": if Tools.suffix(args.db)=="gz": args.dbformat="tabix" args.tabix_format=Tools.guess_format(args.db) else: args.dbformat=Tools.guess_format(args.db) if args.query_method: dict["method"]=args.query_method if args.tabix_format: init_dict["tabix"]=args.tabix_format dbi=DBI.init(args.db,args.dbformat,**init_dict) hits=0 query=0 if args.input=="stdin": input=sys.stdin else: input=args.input query_length=0 hits_number=0 if (args.input_format=="guess"): args.input_format=Tools.guess_format(args.input) for (i0,x) in enumerate(TableIO.parse(input,args.input_format)): if i0%100==0: print >>sys.stderr,"query ",i0," entries\r", print >>out,"QR\t",x hit=0 query+=1 query_length+=len(x) #print dbi;#debug results=dbi.query(x,**dict) #results=dbi.query(x) #DEBUG #print >>sys.stderr,type(results) if isinstance(results,numpy.ndarray) or isinstance(results,list): print >>out,"HT\t", for value in results: print >>out,str(value)+",", print >>out,"" hit=1 hits_number+=1 elif isinstance(results,str): print >>out,"HT\t", print >>out,results hit=1 hits_number+=1 else: for j in results: print >>out,"HT\t",j hit=1 hits_number+=1 if args.dbformat=="tabix": x.chr=x.chr.replace("chr","") for j in dbi.query(x,**dict): print >>out,"HT\t",j hit=1 hits_number+=1 hits+=hit print >>out,"# Query Number:",query,"\n# Query Have Hits:",hits print >>out,"# Query Length:",query_length print >>out,"# Hits Number:",hits_number
def Main(): global args, out args = ParseArg() dict = {} if args.output == "stdout": out = sys.stdout else: try: out = open(args.output, "w") except IOError: print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead" out = sys.stdout argv = sys.argv argv[0] = argv[0].split("/")[-1] print >> out, "# This data was generated by program ", argv[ 0], "(version %s)" % VERSION, print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )" print >> out, "# Date: ", time.asctime() print >> out, "# The command line is :\n#\t", " ".join(argv) init_dict = {} if args.dbformat == "guess": if Tools.suffix(args.db) == "gz": args.dbformat = "tabix" args.tabix_format = Tools.guess_format(args.db) else: args.dbformat = Tools.guess_format(args.db) if args.query_method: dict["method"] = args.query_method if args.tabix_format: init_dict["tabix"] = args.tabix_format dbi = DBI.init(args.db, args.dbformat, **init_dict) hits = 0 query = 0 if args.input == "stdin": input = sys.stdin else: input = args.input query_length = 0 hits_number = 0 if (args.input_format == "guess"): args.input_format = Tools.guess_format(args.input) for (i0, x) in enumerate(TableIO.parse(input, args.input_format)): if i0 % 100 == 0: print >> sys.stderr, "query ", i0, " entries\r", print >> out, "QR\t", x hit = 0 query += 1 query_length += len(x) #print dbi;#debug results = dbi.query(x, **dict) #results=dbi.query(x) #DEBUG #print >>sys.stderr,type(results) if isinstance(results, numpy.ndarray) or isinstance(results, list): print >> out, "HT\t", for value in results: print >> out, str(value) + ",", print >> out, "" hit = 1 hits_number += 1 elif isinstance(results, str): print >> out, "HT\t", print >> out, results hit = 1 hits_number += 1 else: for j in results: print >> out, "HT\t", j hit = 1 hits_number += 1 if args.dbformat == "tabix": x.chr = x.chr.replace("chr", "") for j in dbi.query(x, **dict): print >> out, "HT\t", j hit = 1 hits_number += 1 hits += hit print >> out, "# Query Number:", query, "\n# Query Have Hits:", hits print >> out, "# Query Length:", query_length print >> out, "# Hits Number:", hits_number
def Main(): ''' IO TEMPLATE ''' global args,out args=ParseArg() if args.output=="stdout": out=sys.stdout else: try: out=open(args.output,"w") except IOError: print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead" out=sys.stdout if args.input=="stdin": fin=sys.stdin else: try: x=args.input.split(".") if x[-1]=="gz": fin=gzip.open(args.input,"r") else: fin=open(args.input,"r") except IOError: print >>sys.stderr,"can't read file",args.input fin=sys.stdin ''' END OF IO TEMPLATE ''' print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION, print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )" print >>out,"# Date: ",time.asctime() print >>out,"# The command line is :" print >>out,"#\t"," ".join(sys.argv) gene=DBI.init(args.genetab,args.gene_format); upstream_list=[] downstream_list=[] exons_list=[] introns_list=[] utr3_list=[] utr5_list=[] for g in gene: upstream_list.append(g.upstream(args.upstream)); downstream_list.append(g.downstream(args.downstream)); for e in g.Exons(): exons_list.append(e) for i in g.Introns(): introns_list.append(i) if not (g.utr3() is None): utr3_list.append(g.utr3()) if not (g.utr5() is None): utr5_list.append(g.utr5()) upstream=DBI.init(upstream_list,"bed") downstream=DBI.init(downstream_list,"bed") exons=DBI.init(exons_list,"bed") introns=DBI.init(introns_list,"bed") utr3=DBI.init(utr3_list,"genebed") utr5=DBI.init(utr5_list,"genebed") if args.format=="guess": args.format=Tools.guess_format(args.input) for (i0,i) in enumerate(TableIO.parse(fin,args.format)): if i0==0: if isinstance(i,Bed12): print >>out,"#chr\tstart\tend\tname\tscore\tstrand\tthick_start\tthick_end\titem_rgb\tblock_count\tblock_sizes\tblock_starts\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5" elif isinstance(i,GeneBed): print >>out,"#name\tchr\tstrand\tstart\tend\tcds_start\texon_count\texon_starts\texont_ends\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5" else: print >>out,"#chr\tstart\tend\tname\tscore\tstrand\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5" print >>out,i, print >>out,"\t",toIDs(gene.query(i)), print >>out,"\t",toIDs(upstream.query(i)), print >>out,"\t",toIDs(downstream.query(i)), print >>out,"\t",toIDs(exons.query(i)), print >>out,"\t",toIDs(introns.query(i)), print >>out,"\t",toIDs(utr3.query(i)), print >>out,"\t",toIDs(utr5.query(i))
def Main(): """ IO TEMPLATE """ global args, out args = ParseArg() if args.output == "stdout": out = sys.stdout else: try: out = open(args.output, "w") except IOError: print >>sys.stderr, "can't open file ", args.output, "to write. Using stdout instead" out = sys.stdout if args.input == "stdin": fin = sys.stdin else: try: x = args.input.split(".") if x[-1] == "gz": fin = gzip.open(args.input, "r") else: fin = open(args.input, "r") except IOError: print >>sys.stderr, "can't read file", args.input fin = sys.stdin """ END OF IO TEMPLATE """ print >> out, "# This data was generated by program ", sys.argv[0], " (version: %s)" % VERSION, print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )" print >> out, "# Date: ", time.asctime() print >> out, "# The command line is :" print >> out, "#\t", " ".join(sys.argv) gene = DBI.init(args.genetab, args.gene_format) upstream_list = [] downstream_list = [] exons_list = [] introns_list = [] utr3_list = [] utr5_list = [] for g in gene: upstream_list.append(g.upstream(args.upstream)) downstream_list.append(g.downstream(args.downstream)) for e in g.Exons(): exons_list.append(e) for i in g.Introns(): introns_list.append(i) if not (g.utr3() is None): utr3_list.append(g.utr3()) if not (g.utr5() is None): utr5_list.append(g.utr5()) upstream = DBI.init(upstream_list, "bed") downstream = DBI.init(downstream_list, "bed") exons = DBI.init(exons_list, "bed") introns = DBI.init(introns_list, "bed") utr3 = DBI.init(utr3_list, "genebed") utr5 = DBI.init(utr5_list, "genebed") if args.format == "guess": args.format = Tools.guess_format(args.input) for (i0, i) in enumerate(TableIO.parse(fin, args.format)): if i0 == 0: if isinstance(i, Bed12): print >> out, "#chr\tstart\tend\tname\tscore\tstrand\tthick_start\tthick_end\titem_rgb\tblock_count\tblock_sizes\tblock_starts\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5" elif isinstance(i, GeneBed): print >> out, "#name\tchr\tstrand\tstart\tend\tcds_start\texon_count\texon_starts\texont_ends\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5" else: print >> out, "#chr\tstart\tend\tname\tscore\tstrand\tgene\tupstream\tdownstream\texon\tintron\tutr3\tutr5" print >> out, i, print >> out, "\t", toIDs(gene.query(i)), print >> out, "\t", toIDs(upstream.query(i)), print >> out, "\t", toIDs(downstream.query(i)), print >> out, "\t", toIDs(exons.query(i)), print >> out, "\t", toIDs(introns.query(i)), print >> out, "\t", toIDs(utr3.query(i)), print >> out, "\t", toIDs(utr5.query(i))