Пример #1
0
def performMutualEnrichment(unique_inp_events,event_inp_dictionary,unique_ref_events,event_ref_dictionary):
    N = len(unique_inp_events)
    N = 88000
    for (comparison,direction) in event_inp_dictionary:
        if direction == 'inclusion': alt_direction = 'exclusion'
        else: alt_direction = 'inclusion'
        comparison_events1 = event_inp_dictionary[(comparison,direction)]
        comparison_events2 = event_inp_dictionary[(comparison,alt_direction)]
        for (reference_comp,ref_direction) in event_ref_dictionary:
            if direction == ref_direction and direction == 'inclusion':
                if ref_direction == 'inclusion': alt_ref_direction = 'exclusion'
                else: alt_ref_direction = 'inclusion'
                ref_events1 = event_ref_dictionary[(reference_comp,ref_direction)]
                ref_events2 = event_ref_dictionary[(reference_comp,alt_ref_direction)]
                concordant1 = len(list(set(comparison_events1) & set(ref_events1)))
                concordant2 = len(list(set(comparison_events2) & set(ref_events2)))
                r1 = concordant1+concordant2
                n = len(ref_events1)+len(ref_events2)
                R = len(comparison_events1)+len(comparison_events2)
    
                disconcordant1 = len(list(set(comparison_events1) & set(ref_events2)))
                disconcordant2 = len(list(set(comparison_events2) & set(ref_events1)))
    
                r2 = disconcordant1+disconcordant2
                #n = r1+r2
                
                try: z_concordant = Zscore(r1,n,N,R)
                except ZeroDivisionError: z_concordant = 0.0000
                
                try: z_discordant = Zscore(r2,n,N,R)
                except ZeroDivisionError: z_discordant = 0.0000
                
                try: null_z = Zscore(0,n,N,R)
                except ZeroDivisionError: null_z = 0.000
                ### Calculate a Fischer's Exact P-value
                import mappfinder
                pval1 = mappfinder.FishersExactTest(r1,n,R,N)
                pval2 = mappfinder.FishersExactTest(r2,n,R,N)
                ### Store these data in an object
                #zsd = mappfinder.ZScoreData(signature,r,n,z,null_z,n)
                #zsd.SetP(pval)
                
                print comparison+'\t'+reference_comp+'\t'+ref_direction+'\t'+str(z_concordant)+'\t'+str(z_discordant)+'\t'+str(r2)+'\t'+str(n)+'\t'+str(pval1)+'\t'+str(pval2)
Пример #2
0
def performEventEnrichment(output_dir, eventDir, species):
    """Import significant splicing events from metaDataAnalysis.py comparisons and test for their
    statistical enrichmet relative to the Splicing Factor correlated events."""
    import collections
    import mappfinder
    event_db = collections.OrderedDict()
    import UI
    ### Import the splice-ICGS significant splicing events per signature
    files = UI.read_directory(eventDir)
    for file in files:
        if '.txt' in file and 'PSI.' in file:
            ls = []
            event_db[
                file[:-4]] = ls  ### This list is subsequently updated below
            fn = eventDir + '/' + file
            firstLine = True
            for line in open(fn, 'rU').xreadlines():
                data = line.rstrip()
                t = string.split(data, '\t')
                if firstLine:
                    event_index = t.index('Event-Direction')
                    firstLine = False
                    continue
                uid = t[0]
                if 'U2AF1-like' in file:
                    if t[1] == "inclusion":
                        ls.append(uid)  #ls.append((uid,t[event_index]))
                else:
                    ls.append(uid)  #ls.append((uid,t[event_index]))

    ### Import the splicing-factor correlated splicing events to identify associated signatures
    splicing_factor_correlated_scores = {}
    gene_to_symbol = None
    files = UI.read_directory(output_dir)
    for file in files:
        if '.txt' in file and '_' in file:
            R_ls = []
            if 'ENS' in file:
                splicing_factor = file[:-4]
                if gene_to_symbol == None:  ### Import only once
                    import gene_associations
                    gene_to_symbol = gene_associations.getGeneToUid(
                        species, ('hide', 'Ensembl-Symbol'))
                sf = 'ENS' + string.split(splicing_factor, 'ENS')[1]
                splicing_factor = string.split(sf, '_')[0]
                if splicing_factor in gene_to_symbol:
                    splicing_factor = gene_to_symbol[splicing_factor][0]
            else:
                splicing_factor = string.split(file[:-4], '_')[0]
            fn = output_dir + '/' + file
            firstLine = True
            for line in open(fn, 'rU').xreadlines():
                data = line.rstrip()
                t = string.split(data, '\t')
                event = t[0]
                R_ls.append(event)
            R = len(R_ls)
            N = 80000
            for signature in event_db:
                n_ls = event_db[signature]
                n = len(n_ls)
                r_ls = set(R_ls).intersection(n_ls)
                r = len(r_ls)
                ### Calculate a Z-score
                try:
                    z = Zscore(r, n, N, R)
                except ZeroDivisionError:
                    z = 0.0000
                ### Calculate a Z-score assuming zero matching entries
                try:
                    null_z = Zscore(0, n, N, R)
                except ZeroDivisionError:
                    null_z = 0.000
                ### Calculate a Fischer's Exact P-value
                pval = mappfinder.FishersExactTest(r, n, R, N)
                ### Store these data in an object
                zsd = mappfinder.ZScoreData(signature, r, n, z, null_z, n)
                zsd.SetP(pval)
                zsd.setAssociatedIDs(r_ls)
                #print splicing_factor,'\t', signature,'\t', z, pval;sys.exit()
                if splicing_factor in splicing_factor_correlated_scores:
                    signature_db = splicing_factor_correlated_scores[
                        splicing_factor]
                    signature_db[
                        signature] = zsd  ### Necessary format for the permutation function
                else:
                    signature_db = {signature: zsd}
                    splicing_factor_correlated_scores[
                        splicing_factor] = signature_db

    results_dir = output_dir + '/SFEnrichmentResults'
    result_file = results_dir + '/SF-correlated_SignatureScores.txt'
    try:
        os.mkdir(results_dir)
    except:
        pass
    eo = open(result_file, 'w')
    eo.write(
        string.join([
            'Splicing Factor', 'Signature', 'Number Changed',
            'Number Measured', 'Z-score', 'FisherExactP', 'AdjustedP'
        ], '\t') + '\n')  #'Events'

    ### Perform a permutation analysis to get BH adjusted p-values
    for splicing_factor in splicing_factor_correlated_scores:
        sorted_results = []
        signature_db = splicing_factor_correlated_scores[splicing_factor]
        ### Updates the adjusted p-value instances
        mappfinder.adjustPermuteStats(signature_db)
        for signature in signature_db:
            zsd = signature_db[signature]
            if float(zsd.ZScore()) > 1.96 and float(
                    zsd.Changed()) > 2 and float(zsd.PermuteP()) < 0.05:
                enriched_SFs = {}
                results = [
                    splicing_factor, signature,
                    zsd.Changed(),
                    zsd.Measured(),
                    zsd.ZScore(),
                    zsd.PermuteP(),
                    zsd.AdjP()
                ]  #string.join(zsd.AssociatedIDs(),'|')
                sorted_results.append([float(zsd.PermuteP()), results])
        sorted_results.sort()  ### Sort by p-value
        for (p, values) in sorted_results:
            eo.write(string.join(values, '\t') + '\n')
        if len(sorted_results) == 0:
            eo.write(
                string.join([
                    splicing_factor, 'NONE', 'NONE', 'NONE', 'NONE', 'NONE',
                    'NONE'
                ], '\t') + '\n')
    eo.close()
Пример #3
0
def Enrichment(Inputfile,mutdict,mutfile,Expand,header):
    import collections
    import mappfinder
    X=defaultdict(list)
    prev=""
    head=0
    group=defaultdict(list)
    enrichdict=defaultdict(float)
    mut=export.findFilename(mutfile)
    dire=export.findParentDir(Inputfile)
    output_dir = dire+'MutationEnrichment'
    export.createExportFolder(output_dir)

    exportnam=output_dir+'/Enrichment_Results.txt'
    export_enrich=open(exportnam,"w")
    exportnam=output_dir+'/Enrichment_tophits.txt'
    export_hit=open(exportnam,"w")
   
    export_enrich.write("Mutations"+"\t"+"Cluster"+"\t"+"r"+"\t"+"R"+"\t"+"n"+"\t"+"Sensitivity"+"\t"+"Specificity"+"\t"+"z-score"+"\t"+"Fisher exact test"+"\t"+"adjp value"+"\n")
    if Expand=="yes":
        header2=header_file(Inputfile,Expand="yes")
        
        for line in open(Inputfile,'rU').xreadlines():
            if head >0:
                line=line.rstrip('\r\n')
                q= string.split(line,'\t')
                for i in range(1,len(q)):
                    if q[i]==str(1):
                        #group[q[0]].append(header2[i-1])
                        group[header2[i-1]].append(q[0])
           
            else:
                head+=1
                continue
    else:
        for line in open(Inputfile,'rU').xreadlines():
            line=line.rstrip('\r\n')
            line=string.split(line,'\t')
            #for i in range(1,len(line)):
            group[line[2]].append(line[0])
   
    total_Scores={}
    for kiy in mutdict:
        if kiy =="MDP":
            print mutdict[kiy]
        groupdict={}
        remaining=[]
        remaining=list(set(header) - set(mutdict[kiy]))
        groupdict[1]=mutdict[kiy]
        groupdict[2]=remaining
       # export_enrich1.write(kiy)
        for key2 in group:
           
            
            r=float(len(list(set(group[key2])))-len(list(set(group[key2]) - set(mutdict[kiy]))))
            n=float(len(group[key2]))
            R=float(len(set(mutdict[kiy])))
            N=float(len(header))
        
            if r==0 or R==1.0:
                print kiy,key2,r,n,R,N
                pval=float(1)
                z=float(0)
                null_z = 0.000
                zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n)
                zsd.SetP(pval)
            else:
                try: z = Zscore(r,n,N,R)
                except : z = 0.0000
                ### Calculate a Z-score assuming zero matching entries
                try: null_z = Zscore(0,n,N,R)
                except Exception: null_z = 0.000
               
                
                try:
                    pval = mappfinder.FishersExactTest(r,n,R,N)
                    zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n)
                    zsd.SetP(pval)
                except Exception:
                    pval=1.0
                    zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n)
                    zsd.SetP(pval)
                    #pass
                
          
            if kiy in total_Scores:
                    signature_db = total_Scores[kiy]
                    signature_db[key2]=zsd ### Necessary format for the permutation function
            else:
                    signature_db={key2:zsd}
                    total_Scores[kiy] = signature_db
    sorted_results=[]
    mutlabels={}
    for kiy in total_Scores:
        
        signature_db = total_Scores[kiy]
        ### Updates the adjusted p-value instances
        mappfinder.adjustPermuteStats(signature_db)
        for signature in signature_db:
            zsd = signature_db[signature]
           
            results = [kiy,signature,zsd.Changed(),zsd.Measured(),zsd.InPathway(),str(float(zsd.PercentChanged())/100.0),str(float(float(zsd.Changed())/float(zsd.InPathway()))), zsd.ZScore(), zsd.PermuteP(), zsd.AdjP()] #string.join(zsd.AssociatedIDs(),'|')
            sorted_results.append([signature,float(zsd.PermuteP()),results])
    sorted_results.sort() ### Sort by p-value
    prev=""
    for (sig,p,values) in sorted_results:
        if sig!=prev:
            flag=True
            export_hit.write(string.join(values,'\t')+'\n')
        if flag:
            if (float(values[5])>=0.5 and float(values[6])>=0.5) or float(values[5])>=0.6 :
                mutlabels[values[1]]=values[0]
                flag=False
                export_hit.write(string.join(values,'\t')+'\n')
        export_enrich.write(string.join(values,'\t')+'\n')
        prev=sig
    if len(sorted_results)==0:
            export_enrich.write(string.join([splicing_factor,'NONE','NONE','NONE','NONE','NONE','NONE'],'\t')+'\n')
    export_enrich.close()
    #print mutlabels
    return mutlabels
Пример #4
0
def Enrichment(Inputfile,mutdict,mutfile,metaDataMatrixFormat,header):
    import collections
    import mappfinder
    X=defaultdict(list)
    prev=""
    head=0
    group=defaultdict(list)
    enrichdict=defaultdict(float)
    mut=export.findFilename(mutfile)
    dire=export.findParentDir(Inputfile)
    output_dir = dire+'MutationEnrichment'
    print output_dir
    export.createExportFolder(output_dir)
    number_of_samples = 0
    
    ### All enrichment results
    exportnam=output_dir+'/Enrichment_Results.txt'
    export_enrich=open(exportnam,"w")
    
    ### Selected Enrichment results based on p-value, sensitivity and specificity for association with cluster names
    exportnam=output_dir+'/Enrichment_tophits.txt'
    export_hit=open(exportnam,"w")
   
    header = "Mutations"+"\t"+"Cluster"+"\t"+"r"+"\t"+"R"+"\t"+"n"+"\t"+"Sensitivity"+"\t"+"Specificity"+"\t"+"z-score"+"\t"+"Fisher exact test"+"\t"+"adjp value"+"\n"
    export_enrich.write(header)
    export_hit.write(header)
    header2=returnSamplesInMetaData(Inputfile,metaDataMatrixFormat=True)
    print header2
    for line in open(Inputfile,'rU').xreadlines():
        if head > 0:
            number_of_samples+=1
            line=line.rstrip('\r\n')
            q = string.split(line,'\t')
            for i in range(1,len(q)):
                if q[i]==str(1):
                    #group[q[0]].append(header2[i-1])
                    group[header2[i-1]].append(q[0]) ### [Cluster] = [full_sample_ID]
        else:
            head+=1
            continue
   
    print 'Number of patient samples in dataset =',number_of_samples
    total_Scores={}
    for kiy in mutdict:
        if kiy =="MDP":
            print mutdict[kiy]
        groupdict={}
        remaining=[]
        remaining=list(set(header) - set(mutdict[kiy]))
        groupdict[1]=mutdict[kiy]
        groupdict[2]=remaining
        #export_enrich1.write(kiy)
        for key2 in group:
            r=float(len(list(set(group[key2])))-len(list(set(group[key2]) - set(mutdict[kiy]))))
            n=float(len(group[key2]))
            R=float(len(set(mutdict[kiy])))
            N=float(number_of_samples)
            if r==0 or key2=="1" or R==1.0:
                #print kiy,key2,r,n,R,N
                pval=float(1)
                z=float(0)
                null_z = 0.000
                zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n)
                zsd.SetP(pval)
            else:
                try: z = Zscore(r,n,N,R)
                except: z=0
                ### Calculate a Z-score assuming zero matching entries
                try: null_z = Zscore(0,n,N,R)
                except Exception: null_z = 0.000
               
                try:
                    pval = mappfinder.FishersExactTest(r,n,R,N)
                    zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n)
                    zsd.SetP(pval)
                except Exception:
                    pval=1.0
                    zsd = mappfinder.ZScoreData(key2,r,R,z,null_z,n)
                    zsd.SetP(pval)
                    #pass
                
            if kiy in total_Scores:
                    signature_db = total_Scores[kiy]
                    signature_db[key2]=zsd ### Necessary format for the permutation function
            else:
                    signature_db={key2:zsd}
                    total_Scores[kiy] = signature_db
    sorted_results=[]
    mutlabels={}
    for kiy in total_Scores:
        signature_db = total_Scores[kiy]
        ### Updates the adjusted p-value instances
        mappfinder.adjustPermuteStats(signature_db)
        for signature in signature_db:
            zsd = signature_db[signature]
            results = [kiy,signature,zsd.Changed(),zsd.Measured(),zsd.InPathway(),str(float(zsd.PercentChanged())/100.0),str(float(float(zsd.Changed())/float(zsd.InPathway()))), zsd.ZScore(), zsd.PermuteP(), zsd.AdjP()] #string.join(zsd.AssociatedIDs(),'|')
            sorted_results.append([signature,-1*float(zsd.ZScore()),results])
    sorted_results.sort() ### Sort z-score
    
    prev=""
    for (sig,p,values) in sorted_results:
        if sig!=prev:
            flag=True
            export_hit.write(string.join(values,'\t')+'\n')
        if flag:
            ### Update the cluster label to include the top enriched term meeting, sensitivity and specificity cutoffs
            #print values[5],values[6],values[6],values[2]; sys.exit()
            if (float(values[5])>=0.2 and float(values[6])>=0.2 and float(values[7])>=1.95 and float(values[2])>=2):
                clusterID = values[1]
                topEnrichedTerm=values[0]
                mutlabels[clusterID]=clusterID+' ('+topEnrichedTerm+')'
                flag=False
                export_hit.write(string.join(values,'\t')+'\n')
        export_enrich.write(string.join(values,'\t')+'\n')
        prev=sig
    if len(sorted_results)==0:
            export_enrich.write(string.join([splicing_factor,'NONE','NONE','NONE','NONE','NONE','NONE'],'\t')+'\n')
    export_enrich.close()

    return mutlabels