示例#1
0
def sortSequencesByIncreasingLength( inFile, outFile, verbose=0 ):
	"""
	Save sequences in 'inFile' into 'outFile' sorted by their length in increasing order.
	"""
	if verbose > 0:
		print "sort sequences by increasing length"
		sys.stdout.flush()
	if not os.path.exists( inFile ):
		print "ERROR: file '%s' doesn't exist" % ( inFile )
		sys.exit(1)
		
	# read each seq one by one
	# save them in distinct temporary files
	# with their length in the name
	inFileHandler = open( inFile, "r" )
	bs = Bioseq()
	countSeq = 0
	while True:
		bs.read( inFileHandler )
		if bs.header == None:
			break
		countSeq += 1
		tmpFile = "%ibp_%inb" % ( bs.getLength(), countSeq )
		bs.save( tmpFile )
		if verbose > 1:
			print "%s (%i bp) saved in '%s'" % ( bs.header, bs.getLength(), tmpFile )
		bs.header = ""
		bs.sequence = ""
	inFileHandler.close()
	
	# sort temporary file names
	# concatenate them into the output file
	if os.path.exists( outFile ):
		os.remove( outFile )
	lFiles = glob.glob( "*bp_*nb" )
	lFiles.sort( key=lambda s:int(s.split("bp_")[0]) )
	for fileName in lFiles:
		cmd = "cat %s >> %s" % ( fileName, outFile )
		returnValue = os.system( cmd )
		if returnValue != 0:
			print "ERROR while concatenating '%s' with '%s'" % ( fileName, outFile )
			sys.exit(1)
		os.remove( fileName )
		
	return 0