示例#1
0
def dbTraduit(inFileName,phase=0,complement='T',pep_filename=""):
	"""
	deprecated
	"""
	file_db=open(inFileName)
	if pep_filename=="":
		pep_filename=inFileName+'.pep'
	file_pep=open(pep_filename,'w')
	seq=Bioseq()
	seq_out=Bioseq()
	numseq=0
	while 1:
		seq.read(file_db)
		if seq.sequence==None:
			break
		numseq=numseq+1
		print 'sequence #',numseq,'=',seq.getLength(),\
		      '[',seq.header[0:40],'...]'
		
		if phase>=0 :
			if phase==1 or phase==0 :
				seq_out.sequence=seq.traduit(1)
				seq_out.header=seq.header+" (phase 1)"
				seq_out.write(file_pep)

			if phase==2 or phase==0 :
				seq_out.sequence=seq.traduit(2)
				seq_out.header=seq.header+" (phase 2)"
				seq_out.write(file_pep)

			if phase==3 or phase==0 :
				seq_out.sequence=seq.traduit(3)
				seq_out.header=seq.header+" (phase 3)"
				seq_out.write(file_pep)

		if complement=='T' or phase<0 :
			seq.sequence=seq.complement()

			if phase==-1 or phase==0 :
				seq_out.sequence=seq.traduit(1)
				seq_out.header=seq.header+" (phase -1)"
				seq_out.write(file_pep)

			if phase==-2 or phase==0 :
				seq_out.sequence=seq.traduit(2)
				seq_out.header=seq.header+" (phase -2)"
				seq_out.write(file_pep)

			if phase==-3 or phase==0 :
				seq_out.sequence=seq.traduit(3)
				seq_out.header=seq.header+" (phase -3)"
				seq_out.write(file_pep)

	file_db.close()
	file_pep.close()
示例#2
0
def sortSequencesByIncreasingLength( inFile, outFile, verbose=0 ):
	"""
	Save sequences in 'inFile' into 'outFile' sorted by their length in increasing order.
	"""
	if verbose > 0:
		print "sort sequences by increasing length"
		sys.stdout.flush()
	if not os.path.exists( inFile ):
		print "ERROR: file '%s' doesn't exist" % ( inFile )
		sys.exit(1)
		
	# read each seq one by one
	# save them in distinct temporary files
	# with their length in the name
	inFileHandler = open( inFile, "r" )
	bs = Bioseq()
	countSeq = 0
	while True:
		bs.read( inFileHandler )
		if bs.header == None:
			break
		countSeq += 1
		tmpFile = "%ibp_%inb" % ( bs.getLength(), countSeq )
		bs.save( tmpFile )
		if verbose > 1:
			print "%s (%i bp) saved in '%s'" % ( bs.header, bs.getLength(), tmpFile )
		bs.header = ""
		bs.sequence = ""
	inFileHandler.close()
	
	# sort temporary file names
	# concatenate them into the output file
	if os.path.exists( outFile ):
		os.remove( outFile )
	lFiles = glob.glob( "*bp_*nb" )
	lFiles.sort( key=lambda s:int(s.split("bp_")[0]) )
	for fileName in lFiles:
		cmd = "cat %s >> %s" % ( fileName, outFile )
		returnValue = os.system( cmd )
		if returnValue != 0:
			print "ERROR while concatenating '%s' with '%s'" % ( fileName, outFile )
			sys.exit(1)
		os.remove( fileName )
		
	return 0
示例#3
0
def dbComplement(inFileName,comp_filename=""):
	"""
	deprecated
	"""
	file_db=open(inFileName)
	if comp_filename=="":
		comp_filename=inFileName+'.comp'
	file_comp=open(comp_filename,'w')
	seq=Bioseq()
	numseq=0
	while 1:
		seq.read(file_db)
		if seq.sequence==None:
			break
		numseq=numseq+1
		print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...]'
		seq.sequence=seq.complement()
		seq.header=seq.header+" (complement!)"
		seq.write(file_comp)

	file_db.close()
	file_comp.close()
示例#4
0
def dbConsensus(filename,consensus_filename,max_set_size=20,max_len=20000,min_len=50,min_base_nb=1):
	"""
	deprecated
	"""

	os.system("orienter "+filename)
	tmp_consensus_filename=filename+".oriented.consensus.tmp"
	size_db=dbSize(filename+".oriented")
	file_in=open(filename+".oriented")
	file_out=open(consensus_filename,'w')
	seq=Bioseq()
    
	if size_db==1:
		seq.read(file_in)
		seq.header="not a consensus"
		seq.write(file_out)
		file_out.close()
		file_in.close()
		os.system("cp "+filename+".oriented"+
			  " "+filename+
			  ".malign.fa")
		os.system("cp "+filename+".oriented"+
			  " "+filename+
			  ".malign.fa.cons")
		sys.exit(1)

	seq_in_set=0
	nb_consensus=0
	count_set=0

	set_size=size_db
	while set_size>max_set_size:
		set_size=set_size/2

	tmp_file_out=open(tmp_consensus_filename,'w')
	last_seq=0
	while 1:
		#read subset of sequence
		seq.read(file_in)
		if seq.sequence!=None:
			if seq.getLength() < max_len and seq.getLength() > min_len:
				seq.write(tmp_file_out)
				seq_in_set=seq_in_set+1
			else:
				if seq.getLength() > max_len:
					print seq.header+" too long!!"
					if not seq.header.find(" too long, not aligned"):
						seq.header=seq.header+" too long, not aligned"
						seq.write(file_out)
				if seq.getLength() < min_len:
					print seq.header+" too short!!"
				
		else:
			last_seq=1
			if seq_in_set==0:
				return count_set
				

		# aligne subset
		if seq_in_set==set_size or last_seq:
			count_set=count_set+1
			print "aligning the set #",count_set," of ",seq_in_set," sequences"
			tmp_file_out.close()
			if seq_in_set>1:
				os.system("nice malign "+tmp_consensus_filename
					  +" 20 -8 16 4 > "
					  +tmp_consensus_filename+".malign"
					  +str(count_set)+".fa")
				os.system("nice consensusFastaAli.py -n "
					  +str(min_base_nb)+" "
					  +tmp_consensus_filename
					  +".malign"+str(count_set)+".fa ")
				os.system("cp "+tmp_consensus_filename+
					  ".malign"+str(count_set)+".fa "
					  +filename+
					  ".malign"+str(count_set)+".fa")

			else:
				os.system("cp "+tmp_consensus_filename+
					  " "+filename+
					  ".malign"+str(count_set)+".fa")
				os.system("cp "+tmp_consensus_filename+
					  " "+tmp_consensus_filename+
					  ".malign"+str(count_set)+".fa.cons")

			
			os.system("cat "+tmp_consensus_filename+
				  ".malign"+str(count_set)+\
				  ".fa.cons >> "+consensus_filename)
			seq_in_set=0
			tmp_file_out=open(tmp_consensus_filename,'w')
			if set_size==size_db or last_seq: break               
	tmp_file_out.close()
	file_out.close()
	file_in.close()
	os.system("rm "+tmp_consensus_filename+"* "+filename+".oriented" )
	return count_set