def performMutualEnrichment(unique_inp_events,event_inp_dictionary,unique_ref_events,event_ref_dictionary): N = len(unique_inp_events) N = 88000 for (comparison,direction) in event_inp_dictionary: if direction == 'inclusion': alt_direction = 'exclusion' else: alt_direction = 'inclusion' comparison_events1 = event_inp_dictionary[(comparison,direction)] comparison_events2 = event_inp_dictionary[(comparison,alt_direction)] for (reference_comp,ref_direction) in event_ref_dictionary: if direction == ref_direction and direction == 'inclusion': if ref_direction == 'inclusion': alt_ref_direction = 'exclusion' else: alt_ref_direction = 'inclusion' ref_events1 = event_ref_dictionary[(reference_comp,ref_direction)] ref_events2 = event_ref_dictionary[(reference_comp,alt_ref_direction)] concordant1 = len(list(set(comparison_events1) & set(ref_events1))) concordant2 = len(list(set(comparison_events2) & set(ref_events2))) r1 = concordant1+concordant2 n = len(ref_events1)+len(ref_events2) R = len(comparison_events1)+len(comparison_events2) disconcordant1 = len(list(set(comparison_events1) & set(ref_events2))) disconcordant2 = len(list(set(comparison_events2) & set(ref_events1))) r2 = disconcordant1+disconcordant2 #n = r1+r2 try: z_concordant = Zscore(r1,n,N,R) except ZeroDivisionError: z_concordant = 0.0000 try: z_discordant = Zscore(r2,n,N,R) except ZeroDivisionError: z_discordant = 0.0000 try: null_z = Zscore(0,n,N,R) except ZeroDivisionError: null_z = 0.000 ### Calculate a Fischer's Exact P-value import mappfinder pval1 = mappfinder.FishersExactTest(r1,n,R,N) pval2 = mappfinder.FishersExactTest(r2,n,R,N) ### Store these data in an object #zsd = mappfinder.ZScoreData(signature,r,n,z,null_z,n) #zsd.SetP(pval) print comparison+'\t'+reference_comp+'\t'+ref_direction+'\t'+str(z_concordant)+'\t'+str(z_discordant)+'\t'+str(r2)+'\t'+str(n)+'\t'+str(pval1)+'\t'+str(pval2)
def performEventEnrichment(output_dir, eventDir, species): """Import significant splicing events from metaDataAnalysis.py comparisons and test for their statistical enrichmet relative to the Splicing Factor correlated events.""" import collections import mappfinder event_db = collections.OrderedDict() import UI ### Import the splice-ICGS significant splicing events per signature files = UI.read_directory(eventDir) for file in files: if '.txt' in file and 'PSI.' in file: ls = [] event_db[ file[:-4]] = ls ### This list is subsequently updated below fn = eventDir + '/' + file firstLine = True for line in open(fn, 'rU').xreadlines(): data = line.rstrip() t = string.split(data, '\t') if firstLine: event_index = t.index('Event-Direction') firstLine = False continue uid = t[0] if 'U2AF1-like' in file: if t[1] == "inclusion": ls.append(uid) #ls.append((uid,t[event_index])) else: ls.append(uid) #ls.append((uid,t[event_index])) ### Import the splicing-factor correlated splicing events to identify associated signatures splicing_factor_correlated_scores = {} gene_to_symbol = None files = UI.read_directory(output_dir) for file in files: if '.txt' in file and '_' in file: R_ls = [] if 'ENS' in file: splicing_factor = file[:-4] if gene_to_symbol == None: ### Import only once import gene_associations gene_to_symbol = gene_associations.getGeneToUid( species, ('hide', 'Ensembl-Symbol')) sf = 'ENS' + string.split(splicing_factor, 'ENS')[1] splicing_factor = string.split(sf, '_')[0] if splicing_factor in gene_to_symbol: splicing_factor = gene_to_symbol[splicing_factor][0] else: splicing_factor = string.split(file[:-4], '_')[0] fn = output_dir + '/' + file firstLine = True for line in open(fn, 'rU').xreadlines(): data = line.rstrip() t = string.split(data, '\t') event = t[0] R_ls.append(event) R = len(R_ls) N = 80000 for signature in event_db: n_ls = event_db[signature] n = len(n_ls) r_ls = set(R_ls).intersection(n_ls) r = len(r_ls) ### Calculate a Z-score try: z = Zscore(r, n, N, R) except ZeroDivisionError: z = 0.0000 ### Calculate a Z-score assuming zero matching entries try: null_z = Zscore(0, n, N, R) except ZeroDivisionError: null_z = 0.000 ### Calculate a Fischer's Exact P-value pval = mappfinder.FishersExactTest(r, n, R, N) ### Store these data in an object zsd = mappfinder.ZScoreData(signature, r, n, z, null_z, n) zsd.SetP(pval) zsd.setAssociatedIDs(r_ls) #print splicing_factor,'\t', signature,'\t', z, pval;sys.exit() if splicing_factor in splicing_factor_correlated_scores: signature_db = splicing_factor_correlated_scores[ splicing_factor] signature_db[ signature] = zsd ### Necessary format for the permutation function else: signature_db = {signature: zsd} splicing_factor_correlated_scores[ splicing_factor] = signature_db results_dir = output_dir + '/SFEnrichmentResults' result_file = results_dir + '/SF-correlated_SignatureScores.txt' try: os.mkdir(results_dir) except: pass eo = open(result_file, 'w') eo.write( string.join([ 'Splicing Factor', 'Signature', 'Number Changed', 'Number Measured', 'Z-score', 'FisherExactP', 'AdjustedP' ], '\t') + '\n') #'Events' ### Perform a permutation analysis to get BH adjusted p-values for splicing_factor in splicing_factor_correlated_scores: sorted_results = [] signature_db = splicing_factor_correlated_scores[splicing_factor] ### Updates the adjusted p-value instances mappfinder.adjustPermuteStats(signature_db) for signature in signature_db: zsd = signature_db[signature] if float(zsd.ZScore()) > 1.96 and float( zsd.Changed()) > 2 and float(zsd.PermuteP()) < 0.05: enriched_SFs = {} results = [ splicing_factor, signature, zsd.Changed(), zsd.Measured(), zsd.ZScore(), zsd.PermuteP(), zsd.AdjP() ] #string.join(zsd.AssociatedIDs(),'|') sorted_results.append([float(zsd.PermuteP()), results]) sorted_results.sort() ### Sort by p-value for (p, values) in sorted_results: eo.write(string.join(values, '\t') + '\n') if len(sorted_results) == 0: eo.write( string.join([ splicing_factor, 'NONE', 'NONE', 'NONE', 'NONE', 'NONE', 'NONE' ], '\t') + '\n') eo.close()
def Enrichment(Inputfile,mutdict,mutfile,Expand,header): import collections import mappfinder X=defaultdict(list) prev="" head=0 group=defaultdict(list) enrichdict=defaultdict(float) mut=export.findFilename(mutfile) dire=export.findParentDir(Inputfile) output_dir = dire+'MutationEnrichment' export.createExportFolder(output_dir) exportnam=output_dir+'/Enrichment_Results.txt' export_enrich=open(exportnam,"w") exportnam=output_dir+'/Enrichment_tophits.txt' export_hit=open(exportnam,"w") export_enrich.write("Mutations"+"\t"+"Cluster"+"\t"+"r"+"\t"+"R"+"\t"+"n"+"\t"+"Sensitivity"+"\t"+"Specificity"+"\t"+"z-score"+"\t"+"Fisher exact test"+"\t"+"adjp value"+"\n") if Expand=="yes": header2=header_file(Inputfile,Expand="yes") for line in open(Inputfile,'rU').xreadlines(): if head >0: line=line.rstrip('\r\n') q= string.split(line,'\t') for i in range(1,len(q)): if q[i]==str(1): #group[q[0]].append(header2[i-1]) group[header2[i-1]].append(q[0]) else: head+=1 continue else: for line in open(Inputfile,'rU').xreadlines(): line=line.rstrip('\r\n') line=string.split(line,'\t') #for i in range(1,len(line)): group[line[2]].append(line[0]) total_Scores={} for kiy in mutdict: if kiy =="MDP": print mutdict[kiy] groupdict={} remaining=[] remaining=list(set(header) - set(mutdict[kiy])) groupdict[1]=mutdict[kiy] groupdict[2]=remaining # export_enrich1.write(kiy) for key2 in group: r=float(len(list(set(group[key2])))-len(list(set(group[key2]) - set(mutdict[kiy])))) n=float(len(group[key2])) R=float(len(set(mutdict[kiy]))) N=float(len(header)) if r==0 or R==1.0: print kiy,key2,r,n,R,N pval=float(1) z=float(0) null_z = 0.000 zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n) zsd.SetP(pval) else: try: z = Zscore(r,n,N,R) except : z = 0.0000 ### Calculate a Z-score assuming zero matching entries try: null_z = Zscore(0,n,N,R) except Exception: null_z = 0.000 try: pval = mappfinder.FishersExactTest(r,n,R,N) zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n) zsd.SetP(pval) except Exception: pval=1.0 zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n) zsd.SetP(pval) #pass if kiy in total_Scores: signature_db = total_Scores[kiy] signature_db[key2]=zsd ### Necessary format for the permutation function else: signature_db={key2:zsd} total_Scores[kiy] = signature_db sorted_results=[] mutlabels={} for kiy in total_Scores: signature_db = total_Scores[kiy] ### Updates the adjusted p-value instances mappfinder.adjustPermuteStats(signature_db) for signature in signature_db: zsd = signature_db[signature] results = [kiy,signature,zsd.Changed(),zsd.Measured(),zsd.InPathway(),str(float(zsd.PercentChanged())/100.0),str(float(float(zsd.Changed())/float(zsd.InPathway()))), zsd.ZScore(), zsd.PermuteP(), zsd.AdjP()] #string.join(zsd.AssociatedIDs(),'|') sorted_results.append([signature,float(zsd.PermuteP()),results]) sorted_results.sort() ### Sort by p-value prev="" for (sig,p,values) in sorted_results: if sig!=prev: flag=True export_hit.write(string.join(values,'\t')+'\n') if flag: if (float(values[5])>=0.5 and float(values[6])>=0.5) or float(values[5])>=0.6 : mutlabels[values[1]]=values[0] flag=False export_hit.write(string.join(values,'\t')+'\n') export_enrich.write(string.join(values,'\t')+'\n') prev=sig if len(sorted_results)==0: export_enrich.write(string.join([splicing_factor,'NONE','NONE','NONE','NONE','NONE','NONE'],'\t')+'\n') export_enrich.close() #print mutlabels return mutlabels
def Enrichment(Inputfile,mutdict,mutfile,metaDataMatrixFormat,header): import collections import mappfinder X=defaultdict(list) prev="" head=0 group=defaultdict(list) enrichdict=defaultdict(float) mut=export.findFilename(mutfile) dire=export.findParentDir(Inputfile) output_dir = dire+'MutationEnrichment' print output_dir export.createExportFolder(output_dir) number_of_samples = 0 ### All enrichment results exportnam=output_dir+'/Enrichment_Results.txt' export_enrich=open(exportnam,"w") ### Selected Enrichment results based on p-value, sensitivity and specificity for association with cluster names exportnam=output_dir+'/Enrichment_tophits.txt' export_hit=open(exportnam,"w") header = "Mutations"+"\t"+"Cluster"+"\t"+"r"+"\t"+"R"+"\t"+"n"+"\t"+"Sensitivity"+"\t"+"Specificity"+"\t"+"z-score"+"\t"+"Fisher exact test"+"\t"+"adjp value"+"\n" export_enrich.write(header) export_hit.write(header) header2=returnSamplesInMetaData(Inputfile,metaDataMatrixFormat=True) print header2 for line in open(Inputfile,'rU').xreadlines(): if head > 0: number_of_samples+=1 line=line.rstrip('\r\n') q = string.split(line,'\t') for i in range(1,len(q)): if q[i]==str(1): #group[q[0]].append(header2[i-1]) group[header2[i-1]].append(q[0]) ### [Cluster] = [full_sample_ID] else: head+=1 continue print 'Number of patient samples in dataset =',number_of_samples total_Scores={} for kiy in mutdict: if kiy =="MDP": print mutdict[kiy] groupdict={} remaining=[] remaining=list(set(header) - set(mutdict[kiy])) groupdict[1]=mutdict[kiy] groupdict[2]=remaining #export_enrich1.write(kiy) for key2 in group: r=float(len(list(set(group[key2])))-len(list(set(group[key2]) - set(mutdict[kiy])))) n=float(len(group[key2])) R=float(len(set(mutdict[kiy]))) N=float(number_of_samples) if r==0 or key2=="1" or R==1.0: #print kiy,key2,r,n,R,N pval=float(1) z=float(0) null_z = 0.000 zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n) zsd.SetP(pval) else: try: z = Zscore(r,n,N,R) except: z=0 ### Calculate a Z-score assuming zero matching entries try: null_z = Zscore(0,n,N,R) except Exception: null_z = 0.000 try: pval = mappfinder.FishersExactTest(r,n,R,N) zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n) zsd.SetP(pval) except Exception: pval=1.0 zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n) zsd.SetP(pval) #pass if kiy in total_Scores: signature_db = total_Scores[kiy] signature_db[key2]=zsd ### Necessary format for the permutation function else: signature_db={key2:zsd} total_Scores[kiy] = signature_db sorted_results=[] mutlabels={} for kiy in total_Scores: signature_db = total_Scores[kiy] ### Updates the adjusted p-value instances mappfinder.adjustPermuteStats(signature_db) for signature in signature_db: zsd = signature_db[signature] results = [kiy,signature,zsd.Changed(),zsd.Measured(),zsd.InPathway(),str(float(zsd.PercentChanged())/100.0),str(float(float(zsd.Changed())/float(zsd.InPathway()))), zsd.ZScore(), zsd.PermuteP(), zsd.AdjP()] #string.join(zsd.AssociatedIDs(),'|') sorted_results.append([signature,-1*float(zsd.ZScore()),results]) sorted_results.sort() ### Sort z-score prev="" for (sig,p,values) in sorted_results: if sig!=prev: flag=True export_hit.write(string.join(values,'\t')+'\n') if flag: ### Update the cluster label to include the top enriched term meeting, sensitivity and specificity cutoffs #print values[5],values[6],values[6],values[2]; sys.exit() if (float(values[5])>=0.2 and float(values[6])>=0.2 and float(values[7])>=1.95 and float(values[2])>=2): clusterID = values[1] topEnrichedTerm=values[0] mutlabels[clusterID]=clusterID+' ('+topEnrichedTerm+')' flag=False export_hit.write(string.join(values,'\t')+'\n') export_enrich.write(string.join(values,'\t')+'\n') prev=sig if len(sorted_results)==0: export_enrich.write(string.join([splicing_factor,'NONE','NONE','NONE','NONE','NONE','NONE'],'\t')+'\n') export_enrich.close() return mutlabels