def Main(): ''' IO TEMPLATE ''' global args,out args=ParseArg() fin=IO.fopen(args.input,"r") out=IO.fopen(args.output,"w") ''' END OF IO TEMPLATE ''' print >>out,"# This data was generated by program ",sys.argv[0]," (version: %s)"%VERSION, print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )" print >>out,"# Date: ",time.asctime() print >>out,"# The command line is :" print >>out,"#\t"," ".join(sys.argv) dbi=DBI.init(args.db,Tools.guess_format(args.db)) references=dbi.bamfiles[0].references for i in TableIO.parse(fin,args.format): print i n=0 c_count=0 reads=dbi.query(i,args.method) for read in reads: compatible=Tools.compatible_with_transcript(read,i,references=references,strand=args.strand) print "HT:" for i0,r in enumerate(TableIO.parse(read.reads,"bam2bed12",references=references)): print "READ"+str(i0)+"\t",r print "COMPATIBLE:",compatible,"\n\n" if compatible: c_count+=1 n+=1 print "COMPATIBLE / ALL OVERLAP READS = ",c_count,"/",n print "RATIO\t%.4f"%float(c_count)/n
def Main(): ''' IO TEMPLATE ''' global args, out args = ParseArg() fin = IO.fopen(args.input, "r") out = IO.fopen(args.output, "w") ''' END OF IO TEMPLATE ''' print >> out, "# This data was generated by program ", sys.argv[ 0], " (version: %s)" % VERSION, print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )" print >> out, "# Date: ", time.asctime() print >> out, "# The command line is :" print >> out, "#\t", " ".join(sys.argv) dbi = DBI.init(args.db, Tools.guess_format(args.db)) references = dbi.bamfiles[0].references for i in TableIO.parse(fin, args.format): print i n = 0 c_count = 0 reads = dbi.query(i, args.method) for read in reads: compatible = Tools.compatible_with_transcript( read, i, references=references, strand=args.strand) print "HT:" for i0, r in enumerate( TableIO.parse(read.reads, "bam2bed12", references=references)): print "READ" + str(i0) + "\t", r print "COMPATIBLE:", compatible, "\n\n" if compatible: c_count += 1 n += 1 print "COMPATIBLE / ALL OVERLAP READS = ", c_count, "/", n print "RATIO\t%.4f" % float(c_count) / n
def Main(): global args,out args=ParseArg() dict={} if args.output=="stdout": out=sys.stdout else: try: out=open(args.output,"w") except IOError: print >>sys.stderr,"can't open file ",args.output,"to write. Using stdout instead" out=sys.stdout argv=sys.argv argv[0]=argv[0].split("/")[-1] print >>out,"# This data was generated by program ",argv[0],"(version %s)"%VERSION, print >>out,"in bam2x ( https://github.com/nimezhu/bam2x )" print >>out,"# Date: ",time.asctime() print >>out,"# The command line is :\n#\t"," ".join(argv) if args.query_method: dict["method"]=args.query_method dbi=DBI.init(args.db,args.dbformat) hits=0 query=0 if args.input=="stdin": input=sys.stdin else: input=args.input query_length=0 hits_number=0 for (i0,x) in enumerate(TableIO.parse(input,args.input_format)): if i0%10==0: print >>sys.stderr,"query ",i0," entries\r", print >>out,"QR\t",x hit=0 query+=1 query_length+=len(x) results=dbi.query(x,**dict) compatible=0 #print >>sys.stderr,type(results) if isinstance(results,numpy.ndarray) or isinstance(results,list): if not args.silence: print >>out,"HT\t", for value in results: print >>out,str(value)+",", print >>out,"" hit=1 hits_number+=1 elif isinstance(results,str): if not args.silence: print >>out,"HT\t", print >>out,results hit=1 hits_number+=1 else: this_query_hits=0 for j in results: if not args.silence: print >>out,"HT\t",j, hit=1 hits_number+=1 this_query_hits+=1 if isinstance(j,xplib.Annotation.Bed12) and isinstance(x,xplib.Annotation.Bed12): compatible_binary=Tools.compatible_with_transcript(j,x) if not args.silence: print >>out,"\tCompatible:",compatible_binary if compatible_binary: compatible+=1 else: if not args.silence: print >>out,"" print >>out,"HN\t",this_query_hits if compatible>0: print >>out,"CP\t",compatible if args.dbformat=="tabix": x.chr=x.chr.replace("chr","") for j in dbi.query(x,**dict): print >>out,"HT\t",j hit=1 hits_number+=1 hits+=hit print >>out,"# Query Number:",query,"\n# Query Have Hits:",hits print >>out,"# Query Length:",query_length print >>out,"# Hits Number:",hits_number
def compare_reads(isoforms): # global dbi,out isoforms_set=[] chr=isoforms[0].chr min_start=isoforms[0].start max_stop=isoforms[0].stop for i in isoforms: if i.start < min_start: min_start=i.start if i.stop > max_stop: max_stop=i.stop isoforms_set.append(i) # print >>sys.stderr,"debug",i transcript_region=Bed([chr,min_start,max_stop]); print >>out,"REGION\t",chr,"\t",min_start,"\t",max_stop print >>out,"ISOFORM_INPUT_NUMBER\t",len(isoforms_set) ''' reading all the reads in this transcript region ''' reads_set=[] reads_num=0 for i in dbi.query(transcript_region,method="fetch12"): reads_set.append(i) reads_num+=1 ''' compare two sets ''' l=len(isoforms_set) bincodes={} total=reads_num if total==0: total=0.001 for i in reads_set: bincode=0 for j in isoforms_set: k=Tools.compatible_with_transcript(i,j) if k: bincode = (bincode<<1)+1 else: bincode = bincode<<1 if bincodes.has_key(bincode): bincodes[bincode]+=1 else: bincodes[bincode]=1 init=[ 1.0/l for i in range(l) ] proportion=init ''' EM Initialize ''' ''' E step ''' totals=[0.0 for i in range(l)] new_proportion=[0.0 for i in range(l)] iterate_time=0; while(1): totals=[0.0 for i in range(l)] for code in bincodes.keys(): row_total=0.0 for j in range(l): if get_bit_n(j,l,code): row_total+=proportion[j] for j in range(l): if get_bit_n(j,l,code): totals[j]+=bincodes[code] * proportion[j] / row_total for i in range(l): new_proportion[i]=totals[i]/total ''' M step ''' #print >>sys.stderr,"proportion",proportion #debug #print >>sys.stderr,"new_proportion",new_proportion #debug #print >>sys.stderr,"total",total #debug #print >>sys.stderr,"totals",totals #debug dis=distance(proportion,new_proportion) proportion=new_proportion iterate_time+=1 if(dis<1e-05): break; if(args.BYY and iterate_time > 10): break; ''' BYY Hard Cut Algorithm ''' while(args.BYY): totals=[0.0 for i in range(l)] for code in bincodes.keys(): maxj=-1 for j in range(l): if get_bit_n(j,l,code): # totals[j]+=bincodes[code] * proportion[j] / row_total if maxj==-1: maxj=j elif proportion[j] > proportion[maxj]: maxj=j if maxj!=-1: totals[maxj]+=bincodes[code] #new_proportion=[0.0 for i in range(l)] for i in range(l): new_proportion[i]=totals[i]/total ''' M step ''' dis=distance(proportion,new_proportion) if(dis<1e-05): break; proportion=new_proportion # print >>out,proportion ''' print isoforms ''' for i,x in enumerate(isoforms_set): if proportion[i] > args.threshold: if x.score==0.0: x.score=proportion[i] print >>out,"HT\t",x,"\t",proportion[i] else: print >>out,"HT\t",x,"\t",proportion[i] else: if not args.hits_only: if x.score==0.0: x.score=proportion[i] print >>out,"NT\t",x,"\t",proportion[i] else: print >>out,"NT\t",x,"\t",proportion[i] print >>out,"//"
def Main(): global args, out args = ParseArg() dict = {} if args.output == "stdout": out = sys.stdout else: try: out = open(args.output, "w") except IOError: print >> sys.stderr, "can't open file ", args.output, "to write. Using stdout instead" out = sys.stdout argv = sys.argv argv[0] = argv[0].split("/")[-1] print >> out, "# This data was generated by program ", argv[ 0], "(version %s)" % VERSION, print >> out, "in bam2x ( https://github.com/nimezhu/bam2x )" print >> out, "# Date: ", time.asctime() print >> out, "# The command line is :\n#\t", " ".join(argv) if args.query_method: dict["method"] = args.query_method dbi = DBI.init(args.db, args.dbformat) hits = 0 query = 0 if args.input == "stdin": input = sys.stdin else: input = args.input query_length = 0 hits_number = 0 for (i0, x) in enumerate(TableIO.parse(input, args.input_format)): if i0 % 10 == 0: print >> sys.stderr, "query ", i0, " entries\r", print >> out, "QR\t", x hit = 0 query += 1 query_length += len(x) results = dbi.query(x, **dict) compatible = 0 #print >>sys.stderr,type(results) if isinstance(results, numpy.ndarray) or isinstance(results, list): if not args.silence: print >> out, "HT\t", for value in results: print >> out, str(value) + ",", print >> out, "" hit = 1 hits_number += 1 elif isinstance(results, str): if not args.silence: print >> out, "HT\t", print >> out, results hit = 1 hits_number += 1 else: this_query_hits = 0 for j in results: if not args.silence: print >> out, "HT\t", j, hit = 1 hits_number += 1 this_query_hits += 1 if isinstance(j, xplib.Annotation.Bed12) and isinstance( x, xplib.Annotation.Bed12): compatible_binary = Tools.compatible_with_transcript(j, x) if not args.silence: print >> out, "\tCompatible:", compatible_binary if compatible_binary: compatible += 1 else: if not args.silence: print >> out, "" print >> out, "HN\t", this_query_hits if compatible > 0: print >> out, "CP\t", compatible if args.dbformat == "tabix": x.chr = x.chr.replace("chr", "") for j in dbi.query(x, **dict): print >> out, "HT\t", j hit = 1 hits_number += 1 hits += hit print >> out, "# Query Number:", query, "\n# Query Have Hits:", hits print >> out, "# Query Length:", query_length print >> out, "# Hits Number:", hits_number