Exemplo n.º 1
0
def run_cmfinder(Q, nodes_to_index, sets_for_nodes, prefix, fasta_filename):
	import tempfile, miscCMF
	old_dir = os.popen("pwd").read().strip()
	MOTIF_DIR = '../../MOTIF_DIR/experiment'
	prefix_dir = tempfile.mkdtemp(dir=MOTIF_DIR, prefix=prefix)
	
	F = FastaReader(fasta_filename)
	os.chdir(prefix_dir)
	handle = open( os.path.basename(prefix_dir)+'.fna', 'w' )
	for q in Q:
		m = sets_for_nodes[q]
		acc_id = nodes_to_index[m['nodes_ind']]
		handle.write(">{0}({1}-{2})\n".format(acc_id,m['start'],m['end']))
		handle.write("{0}\n".format(F[acc_id].seq[m['start']:(m['end']+1)]))
	handle.close()	

	# run CMfinder
	os.system("cmfinder.pl -def " + handle.name)
	# rank the motifs with rank_cmfinder.pl
	os.system("rank_cmfinder.pl -w -rank \"{0}.*.motif.*\" {0}.summary".format(handle.name))
	# run pfold_pscore, the summary is written to <fna_name>.pscore.summary
	# and in addition we get back a descending sorted list of (lod,motif)
	pscores = miscCMF.pfold_pscore(motif_dir='.')
	print >> sys.stderr, "prefix dir is", prefix_dir
	# take the highest pscore motif, cmbuild then cmsearch the original fasta
	# TODO: change behaviour later?
	scan_result = None
	if len(pscores) > 0:
		scan_result = run_infernal(motif=pscores[0][1], scan_filename=fasta_filename,\
				output_prefix=pscores[0][1]+'.Rfam')
		print >> sys.stderr, "scan result is...", scan_result
	print >> sys.stderr, "prefix dir is", prefix_dir
	os.chdir(old_dir)
	return os.path.basename(prefix_dir), scan_result
Exemplo n.º 2
0
def rank_motifs(dir_of_motif_dir, webdir, use_pscore=True, use_rankpl=False):
	"""
	Exactly one of use_pscore/use_rankpl should be True.
	"""
	assert os.path.isdir(webdir)
	if not operator.xor(use_pscore, use_rankpl):
		raise Exception, "exactly use_pscore/use_rankpl should be True!"

	total_ranks = []
	
	dir_of_motif_dir = os.path.abspath( dir_of_motif_dir )

	for d in os.listdir(dir_of_motif_dir):
		dd = os.path.join( dir_of_motif_dir, d )
		if not os.path.isdir(dd):
			continue
		print >> sys.stderr, "ranking motifs for directory {0}...".format(d)

		with ToDirAndBack(dd):
			fam_o,count_o,clique_size,ids_hit = eval_original_fna( d + '.fna' )

			if use_rankpl:
				ranks = miscCMF.rank_cmfinder_score( dd )
			else: # use pscore
				ranks = miscCMF.pfold_pscore( dd )
		
			if len(ranks) == 0:
				if fam_o is not None:
					# if this clique originally was a ncRNA clique, if so, 
					# we put it in total_ranks but with a rank of -1.0
					total_ranks.append( MotifRankInfo(fam='NA',\
							count=0,\
							motif_size=0,\
							motif_filename=d+'.fna',\
							fam_o=fam_o,\
							count_o=count_o,\
							clique_size=clique_size,\
							rank=MotifRankInfo.NO_MOTIF_RANK) )
					print >> sys.stderr, "copying fastafile {0} to webdir {1}".format(d+'.fna',webdir)
					os.system("cp {fasta} {webdir}".format(fasta=d+'.fna', webdir=webdir))
				continue

			rank_index,motif_filename = ranks[0] # for now take just the top rank motif
			fam,count,motif_size = read_cmfinder_motif(motif_filename)			
			total_ranks.append( MotifRankInfo(fam=fam,\
					count=count,\
					motif_size=motif_size,\
					motif_filename=os.path.basename(motif_filename)+'.html',\
					fam_o=fam_o,\
					count_o=count_o,\
					clique_size=clique_size,\
					rank=rank_index) )

			# use Zasha's script to create a colorful alignment html, then copy it to web directory
			os.system("perl ~/lib/perl/StockholmUnblock.pl {0} tmp.unblocked".format(motif_filename))
			os.system("cmzasha --GSC-weighted-consensus tmp.unblocked tmp 3 0.97 0.9 0.75 4 0.97 0.9 0.75 0.5 0.05")
			os.system("perl -I/homes/gws/lachesis/lib/perl ~/lib/perl/FancifyStockholm.pl "\
					"-noWarnParseHitId -forNewMotifsWeb -noURL -highlightCovarying tmp {0}.html".format(motif_filename))
			os.system("cp {motif}.html {webdir}".format(motif=motif_filename, webdir=webdir))
			os.system("cp tmp.unblocked {webdir}/{motif}.unblocked".format(motif=os.path.basename(motif_filename),webdir=webdir))
		
	return total_ranks