示例#1
0
def dbORF( inFileName, nb=0, size=0, outFileName="" ):

	inFile = open( inFileName )
	if outFileName == "":
		outFileName = inFileName + ".orf.map"
	outFile = open( outFileName, "w" )

	seq = Bioseq()
	seq_out = Bioseq()
	numseq = 0

	while 1:
		seq.read( inFile )
		if seq.sequence == None:
			break
		seq.upCase() 
		numseq = numseq + 1
		print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...]'

		orf = seq.findORF()

		best_orf = []
		for i in orf.keys():
			l = len(orf[i])
			for j in xrange(1,l):
				start = orf[i][j-1] + 4
				end = orf[i][j] + 3
				if end - start >= size:
					best_orf.append( ( end-start, i+1, start, end ) )

		seq.sequence = seq.complement()

		orf = seq.findORF()
		seqlen = seq.getLength()
		for i in orf.keys():
			l = len(orf[i])
			for j in xrange(1,l):
				start = seqlen - orf[i][j-1] - 3
				end = seqlen - orf[i][j] - 2
				if start - end >= size:
					best_orf.append( ( start-end, (i+1)*-1, start, end ) )

		best_orf.sort()
		best_orf.reverse()
		l = len(best_orf)
		if nb > l or nb == 0 :
			nb = l
		for i in xrange(0,nb):
			print best_orf[i]
			outFile.write("%s\t%s\t%d\t%d\n"%("ORF|"+str(best_orf[i][1])+\
							   "|"+str(best_orf[i][0]),seq.header,
							   best_orf[i][2],best_orf[i][3]))

	inFile.close()
	outFile.close()

	return 0
示例#2
0
def dbTraduit(inFileName,phase=0,complement='T',pep_filename=""):
	"""
	deprecated
	"""
	file_db=open(inFileName)
	if pep_filename=="":
		pep_filename=inFileName+'.pep'
	file_pep=open(pep_filename,'w')
	seq=Bioseq()
	seq_out=Bioseq()
	numseq=0
	while 1:
		seq.read(file_db)
		if seq.sequence==None:
			break
		numseq=numseq+1
		print 'sequence #',numseq,'=',seq.getLength(),\
		      '[',seq.header[0:40],'...]'
		
		if phase>=0 :
			if phase==1 or phase==0 :
				seq_out.sequence=seq.traduit(1)
				seq_out.header=seq.header+" (phase 1)"
				seq_out.write(file_pep)

			if phase==2 or phase==0 :
				seq_out.sequence=seq.traduit(2)
				seq_out.header=seq.header+" (phase 2)"
				seq_out.write(file_pep)

			if phase==3 or phase==0 :
				seq_out.sequence=seq.traduit(3)
				seq_out.header=seq.header+" (phase 3)"
				seq_out.write(file_pep)

		if complement=='T' or phase<0 :
			seq.sequence=seq.complement()

			if phase==-1 or phase==0 :
				seq_out.sequence=seq.traduit(1)
				seq_out.header=seq.header+" (phase -1)"
				seq_out.write(file_pep)

			if phase==-2 or phase==0 :
				seq_out.sequence=seq.traduit(2)
				seq_out.header=seq.header+" (phase -2)"
				seq_out.write(file_pep)

			if phase==-3 or phase==0 :
				seq_out.sequence=seq.traduit(3)
				seq_out.header=seq.header+" (phase -3)"
				seq_out.write(file_pep)

	file_db.close()
	file_pep.close()
示例#3
0
def dbComplement(inFileName,comp_filename=""):
	"""
	deprecated
	"""
	file_db=open(inFileName)
	if comp_filename=="":
		comp_filename=inFileName+'.comp'
	file_comp=open(comp_filename,'w')
	seq=Bioseq()
	numseq=0
	while 1:
		seq.read(file_db)
		if seq.sequence==None:
			break
		numseq=numseq+1
		print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...]'
		seq.sequence=seq.complement()
		seq.header=seq.header+" (complement!)"
		seq.write(file_comp)

	file_db.close()
	file_comp.close()