def sortSequencesByIncreasingLength( inFile, outFile, verbose=0 ): """ Save sequences in 'inFile' into 'outFile' sorted by their length in increasing order. """ if verbose > 0: print "sort sequences by increasing length" sys.stdout.flush() if not os.path.exists( inFile ): print "ERROR: file '%s' doesn't exist" % ( inFile ) sys.exit(1) # read each seq one by one # save them in distinct temporary files # with their length in the name inFileHandler = open( inFile, "r" ) bs = Bioseq() countSeq = 0 while True: bs.read( inFileHandler ) if bs.header == None: break countSeq += 1 tmpFile = "%ibp_%inb" % ( bs.getLength(), countSeq ) bs.save( tmpFile ) if verbose > 1: print "%s (%i bp) saved in '%s'" % ( bs.header, bs.getLength(), tmpFile ) bs.header = "" bs.sequence = "" inFileHandler.close() # sort temporary file names # concatenate them into the output file if os.path.exists( outFile ): os.remove( outFile ) lFiles = glob.glob( "*bp_*nb" ) lFiles.sort( key=lambda s:int(s.split("bp_")[0]) ) for fileName in lFiles: cmd = "cat %s >> %s" % ( fileName, outFile ) returnValue = os.system( cmd ) if returnValue != 0: print "ERROR while concatenating '%s' with '%s'" % ( fileName, outFile ) sys.exit(1) os.remove( fileName ) return 0