def run_cmfinder(Q, nodes_to_index, sets_for_nodes, prefix, fasta_filename): import tempfile, miscCMF old_dir = os.popen("pwd").read().strip() MOTIF_DIR = '../../MOTIF_DIR/experiment' prefix_dir = tempfile.mkdtemp(dir=MOTIF_DIR, prefix=prefix) F = FastaReader(fasta_filename) os.chdir(prefix_dir) handle = open( os.path.basename(prefix_dir)+'.fna', 'w' ) for q in Q: m = sets_for_nodes[q] acc_id = nodes_to_index[m['nodes_ind']] handle.write(">{0}({1}-{2})\n".format(acc_id,m['start'],m['end'])) handle.write("{0}\n".format(F[acc_id].seq[m['start']:(m['end']+1)])) handle.close() # run CMfinder os.system("cmfinder.pl -def " + handle.name) # rank the motifs with rank_cmfinder.pl os.system("rank_cmfinder.pl -w -rank \"{0}.*.motif.*\" {0}.summary".format(handle.name)) # run pfold_pscore, the summary is written to <fna_name>.pscore.summary # and in addition we get back a descending sorted list of (lod,motif) pscores = miscCMF.pfold_pscore(motif_dir='.') print >> sys.stderr, "prefix dir is", prefix_dir # take the highest pscore motif, cmbuild then cmsearch the original fasta # TODO: change behaviour later? scan_result = None if len(pscores) > 0: scan_result = run_infernal(motif=pscores[0][1], scan_filename=fasta_filename,\ output_prefix=pscores[0][1]+'.Rfam') print >> sys.stderr, "scan result is...", scan_result print >> sys.stderr, "prefix dir is", prefix_dir os.chdir(old_dir) return os.path.basename(prefix_dir), scan_result
def rank_motifs(dir_of_motif_dir, webdir, use_pscore=True, use_rankpl=False): """ Exactly one of use_pscore/use_rankpl should be True. """ assert os.path.isdir(webdir) if not operator.xor(use_pscore, use_rankpl): raise Exception, "exactly use_pscore/use_rankpl should be True!" total_ranks = [] dir_of_motif_dir = os.path.abspath( dir_of_motif_dir ) for d in os.listdir(dir_of_motif_dir): dd = os.path.join( dir_of_motif_dir, d ) if not os.path.isdir(dd): continue print >> sys.stderr, "ranking motifs for directory {0}...".format(d) with ToDirAndBack(dd): fam_o,count_o,clique_size,ids_hit = eval_original_fna( d + '.fna' ) if use_rankpl: ranks = miscCMF.rank_cmfinder_score( dd ) else: # use pscore ranks = miscCMF.pfold_pscore( dd ) if len(ranks) == 0: if fam_o is not None: # if this clique originally was a ncRNA clique, if so, # we put it in total_ranks but with a rank of -1.0 total_ranks.append( MotifRankInfo(fam='NA',\ count=0,\ motif_size=0,\ motif_filename=d+'.fna',\ fam_o=fam_o,\ count_o=count_o,\ clique_size=clique_size,\ rank=MotifRankInfo.NO_MOTIF_RANK) ) print >> sys.stderr, "copying fastafile {0} to webdir {1}".format(d+'.fna',webdir) os.system("cp {fasta} {webdir}".format(fasta=d+'.fna', webdir=webdir)) continue rank_index,motif_filename = ranks[0] # for now take just the top rank motif fam,count,motif_size = read_cmfinder_motif(motif_filename) total_ranks.append( MotifRankInfo(fam=fam,\ count=count,\ motif_size=motif_size,\ motif_filename=os.path.basename(motif_filename)+'.html',\ fam_o=fam_o,\ count_o=count_o,\ clique_size=clique_size,\ rank=rank_index) ) # use Zasha's script to create a colorful alignment html, then copy it to web directory os.system("perl ~/lib/perl/StockholmUnblock.pl {0} tmp.unblocked".format(motif_filename)) os.system("cmzasha --GSC-weighted-consensus tmp.unblocked tmp 3 0.97 0.9 0.75 4 0.97 0.9 0.75 0.5 0.05") os.system("perl -I/homes/gws/lachesis/lib/perl ~/lib/perl/FancifyStockholm.pl "\ "-noWarnParseHitId -forNewMotifsWeb -noURL -highlightCovarying tmp {0}.html".format(motif_filename)) os.system("cp {motif}.html {webdir}".format(motif=motif_filename, webdir=webdir)) os.system("cp tmp.unblocked {webdir}/{motif}.unblocked".format(motif=os.path.basename(motif_filename),webdir=webdir)) return total_ranks