def dbORF( inFileName, nb=0, size=0, outFileName="" ): inFile = open( inFileName ) if outFileName == "": outFileName = inFileName + ".orf.map" outFile = open( outFileName, "w" ) seq = Bioseq() seq_out = Bioseq() numseq = 0 while 1: seq.read( inFile ) if seq.sequence == None: break seq.upCase() numseq = numseq + 1 print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...]' orf = seq.findORF() best_orf = [] for i in orf.keys(): l = len(orf[i]) for j in xrange(1,l): start = orf[i][j-1] + 4 end = orf[i][j] + 3 if end - start >= size: best_orf.append( ( end-start, i+1, start, end ) ) seq.sequence = seq.complement() orf = seq.findORF() seqlen = seq.getLength() for i in orf.keys(): l = len(orf[i]) for j in xrange(1,l): start = seqlen - orf[i][j-1] - 3 end = seqlen - orf[i][j] - 2 if start - end >= size: best_orf.append( ( start-end, (i+1)*-1, start, end ) ) best_orf.sort() best_orf.reverse() l = len(best_orf) if nb > l or nb == 0 : nb = l for i in xrange(0,nb): print best_orf[i] outFile.write("%s\t%s\t%d\t%d\n"%("ORF|"+str(best_orf[i][1])+\ "|"+str(best_orf[i][0]),seq.header, best_orf[i][2],best_orf[i][3])) inFile.close() outFile.close() return 0