def OnStatExport(self, event): if not self.project.has_key('exportFA'): outDir="%s/Export" % (self.project['directory']) createDir(outDir) self.project['exportFA']= ReportFA(outDir=outDir,name=self.project['name'], organism=self.project['organism']) try: self.project['exportFA'].saveStatistics(self.project['allFA'] , [statistics for statistics in self.workflow if self.viewStat]) self.popup.msg(self.frame, "Congratulations, statistics have been successfully exported to %s" % self.project['exportFA'].outDir, "Operation") except : self.popup.error(self.frame, "failed to export statistics to %s" % self.project['exportFA'].outDir)
def AffyCCAnnotations(FA, organism): from AIGO.pyGS2 import plot_InducedGraph s = set() for gp in FA.GPtoGO['cellular_component']: s = s | FA.GPtoGO['cellular_component'][gp] outDir = "%s/Graph/%s/CCAnnotations/" % (projectDir, organism) createDir(outDir) fileName = "%s/annotation.png" % (outDir) plot_InducedGraph(list(s), G, fileName=fileName, ttl="Affymetrix wheat Cellular Component")
def OnCompareView(self, event): if len(self.project['compareFA']['allFA'])==0: self.popup.error(self.frame, "must first select at least one Functional Annotation") return cb=event.GetEventObject() aspect=cb.GetName() lst=getattr(self.frame.notebook, 'list_ctrl_Compare_%s' % aspect) current=lst.GetFirstSelected() if current==-1: self.popup.error(self.frame, "you must first select the annotation set you want to see") return createPlotFA(self.project) outDir="%s/Annotation" % (self.project['directory']) createDir(outDir) gp=lst.GetItem(current,0).GetText() if len(self.project['compareFA']['allFA'])==1: FA1=self.project.getFA(self.project['compareFA']['allFA'][0]) figName="%s/%s_%s_%s.png" % (outDir, gp, FA1.name, self.project['name']) ttl="%s annotations of %s from %s " % (aspect.replace("_", " "), gp, FA1.name) self.project['GO'].plot_InducedGraph(FA1.GPtoGO[aspect][gp], figName=figName, ttl="") else: FA1=self.project.getFA(self.project['compareFA']['allFA'][0]) FA2=self.project.getFA(self.project['compareFA']['allFA'][1]) figName="%s/%s_%s_%s_%s.png" % (outDir, gp, FA1.name, FA2.name, self.project['name']) ttl="%s annotations of %s from %s (green) and %s (red)" % (aspect.replace("_", " "), gp, FA1.name, FA2.name) self.project['GO'].compare_InducedGraph(FA1.GPtoGO[aspect][gp],FA2.GPtoGO[aspect][gp], figName=figName, ttl="") self.project['compareFA']['figName']=figName size=self.ViewFrame.bitmap.GetSize().Get() self.ViewFrame.bitmap.SetBitmap(PNGtoBitmap(figName, resize=size)) self.ViewFrame.bitmap.CenterOnParent(wx.BOTH) self.ViewFrame.Show(True)
def GSAnnotations(projectDir, organism, G, Affy): """ This function study the evolution of the Affymetrix annotations for 4 GS probesets in Rice """ GSProbes = [ 'Os.7909.1.S1_at', 'Os.12728.1.S1_at', 'Os.7879.1.S1_at', 'Os.48875.1.S1_at' ] for gsp in GSProbes: print "=============================================================================" print " Annotation for probe %s " % gsp print "=============================================================================" d = dict() for r in Affy: print "------------Release %d---------------" % r for aspect in Affy[r].GPtoGO: if Affy[r].GPtoGO[aspect].has_key(gsp): print "\t%s : " % aspect, Affy[r].GPtoGO[aspect][gsp] else: print "\tNo annotations in %s" % aspect #Get the list of all GO terms l = list() for aspect in Affy[r].GPtoGO: if Affy[r].GPtoGO[aspect].has_key(gsp): l.extend(Affy[r].GPtoGO[aspect][gsp]) if len(l) > 0: t = tuple(sort(l)) if not d.has_key(t): d[t] = r for t in d: outDir = "%s/Graph/%s/GSAnnotations/%s" % (projectDir, organism, gsp) createDir(outDir) figName = "%s/release%d.png" % (outDir, d[t]) G.plot_InducedGraph(t, figName=figName, ttl="Affymetrix release %d" % d[t])
def testSCOP(projectDir): """ """ projectName="SCOP" organism="scop" #Read rice microarray target sequence to define the set of gene products fileName= "%s/ReferenceSet/%s.txt" %(projectDir, organism) refSet=RefSet(organism, fileName, refType="Text") #Read GO ontoloy fileName= "%s/OBO/go_daily-termdb.obo-xml" %(projectDir) G= readGOoboXML(fileName, force=False) fileName="%s/Annotation/%s.txt" % (projectDir, organism) pipeName=projectName fileType="SCOP" FA=FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) #Analyse Functional annotations analyseFA = AnalyseFA() batchList=["unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent"] batchExecute(batchList, analyseFA, [FA]) #Plot statistics of Functional annotations outDir="%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="", outDir=outDir, name=projectName, organism=organism) batchExecute(batchList, plotFA, [FA]) #----------------------------------------------- #Export statistics to Excel outDir="%s/Export/%s" % (projectDir, organism) createDir(outDir) exportList=["unconnected", "coverage", "numberAnnot", "richness", "coherence", "compactness", "specificity", "informationContent", "redundancy"] reportFA = ReportFA( outDir=outDir, name=projectName, organism=organism) reportFA.printStatistics([FA] ,exportList) reportFA.saveStatistics([FA] ,exportList)
def worseSim_COPSAandB2G(projectDir, organism, FA1, FA2): from AIGO.Similarity import GOSet_PWSimilarity from itertools import izip for aspect in allAspect: if aspect == "All_aspects_of_GO": continue commonGene = set(FA1.GPtoGO[aspect].keys()).intersection( FA2.GPtoGO[aspect].keys()) allD1, allD2 = list(), list() for i, g in enumerate(commonGene): sim, l = GOSet_PWSimilarity(FA1.G, FA1.GPtoGO[aspect][g], FA2.GPtoGO[aspect][g]) allD1.append(l[0]) allD2.append(l[1]) allD = map(lambda D: ((array(D[0]) + array(D[1])) / 2.), izip(allD1, allD2)) idx = argsort(allD) outDir = "%s/Graph/%s/WorseFuncSim" % (projectDir, organism) createDir(outDir) for i in arange(0, 10): gp = list(commonGene)[idx[i]] figName = "%s/%s_annotation_%s_from_%s_%s.png" % ( outDir, aspect, gp, FA1.name, FA2.name) #figName="%s/%s_annotation_%s_from_%s_%s.pdf" % (outDir, aspect, gp, FA1.name, FA2.name) ttl = "%s annotations of %s from %s (green) and %s (red) : Functional similarity = %.2f" % ( aspect.replace("_", " "), gp, FA1.name, FA2.name, allD[idx[i]]) #ttl="" FA1.G.compare_InducedGraph(FA1.GPtoGO[aspect][gp], FA2.GPtoGO[aspect][gp], figName=figName, ttl=ttl)
def compareRiceAffymetrixReleases(projectDir): """ This function compare the properties of 10 release of Affymetrix annotations for a Rice array. """ projectName = "Affymetrix" organism = "rice" #Read rice microarray target sequence to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism, fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read 11 release of Affymetrix Functional annotations release = arange(20, 32) Affy = dict() for r in release: FA = FuncAnnot(str(r), refSet, G, organism=organism) fileName = "%s/Annotation/Affy_%s.na%d.annot.csv" % (projectDir, organism, r) FA.read(fileName, fileType="AFFY") Affy[r] = FA #Analyse Functional annotations analyseFA = AnalyseFA() batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness" ] batchExecute(batchList, analyseFA, [Affy[r] for r in release]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Affymetrix Release number", outDir=outDir, name=projectName, organism=organism, ext="pdf") batchExecute(batchList, plotFA, [Affy[r] for r in release], doGrid=True, lloc="upper right") #Compare release 20 and 31 compareFA = CompareFA() batchList = ["venn", "funcSim"] batchExecute(batchList, compareFA, [Affy[20], Affy[31]]) #Plot statistics of the comparison batchList = ["venn", "funcSymSim"] batchExecute(batchList, plotFA, compareFA, [Affy[20], Affy[31]], doGrid=True, tit="") #Find the worse semantic similarity between 20 and 31 worseFunctionalSimilarity(projectDir, organism, Affy[20], Affy[31]) #Study the evolution of Glutamine Synthetase (GS) annotations GSAnnotations(projectDir, organism, G, Affy)
def compareEvidence(projectDir): """ This function compare electronically infered and manually curated annotations to experimental annotations """ projectName = "EvidenceCode" organism = "allSpecies" refSet = RefSet(organism) allOrg = [ "Arabidopsis_thaliana", "Drosophila_melanogaster", "Mycobacterium_tuberculosis_ATCC_25618", "Schizosaccharomyces_pombe", "Bos_taurus", "Escherichia_coli_ATCC_27325", "Mycobacterium_tuberculosis_Oshkosh", "Caenorhabditis_elegans", "Escherichia_coli_MG1655", "Oryza_sativa", "Synechocystis_sp", "Candida_albicans_SC5314", "Gallus_gallus", "Pseudomonas_fluorescens_Pf-5", "Danio_rerio", "Homo_sapiens", "Rattus_norvegicus" ] for refOrg in allOrg: #Define the set of gene products fileName = "%s/EvidenceCode/%s/two_experimental_evidence.goa" % ( projectDir, refOrg) refSet.add(fileName, refType="GAF") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read all annotations fileType = "GAF" evidenceCodes = ["EXP2", "IC", "TAS", "ISS", "NAS", "IEA"] allFA = dict() #----------------------------------------------- #Read Functional annotations obtained by experiments pipeName = "EXP2" EXP2 = FuncAnnot(pipeName, refSet, G, organism=organism) for refOrg in allOrg: fileName = "%s/EvidenceCode/%s/two_experimental_evidence.goa" % ( projectDir, refOrg) FA = FuncAnnot(pipeName, refSet, G, organism=refOrg) FA.read(fileName, fileType=fileType) EXP2.add(FA) allFA[pipeName] = EXP2 #----------------------------------------------- #Read Functional annotations obtained by human curation for pipeName in ["IC", "TAS", "ISS", "NAS"]: EV = FuncAnnot(pipeName, refSet, G, organism=organism) for refOrg in allOrg: fileName = "%s/EvidenceCode/%s/%s.goa" % (projectDir, refOrg, pipeName) if not os.path.exists(fileName): continue FA = FuncAnnot(pipeName, refSet, G, organism=refOrg) FA.read(fileName, fileType=fileType) EV.add(FA) allFA[pipeName] = EV #Merge FAs Assigned by Human Curator FA = FuncAnnot("AHC", refSet, G, organism=organism) #for evidence in ["IC", "TAS", "ISS", "NAS"]: for evidence in ["IC", "ISS", "NAS"]: FA.add(allFA[evidence]) allFA["AHC"] = FA #----------------------------------------------- #Read Functional annotations obtained without human curation for pipeName in ["IEA"]: EV = FuncAnnot(pipeName, refSet, G, organism=organism) for refOrg in allOrg: fileName = "%s/EvidenceCode/%s/%s.goa" % (projectDir, refOrg, pipeName) if not os.path.exists(fileName): continue FA = FuncAnnot(pipeName, refSet, G, organism=refOrg) FA.read(fileName, fileType=fileType) EV.add(FA) allFA[pipeName] = EV #----------------------------------------------- listFA = ["EXP2", "AHC", "IEA"] #Analyse Functional annotations analyseFA = AnalyseFA() batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "removeRedundancy", "compactness", "specificity", "informationContent" ] batchExecute(batchList, analyseFA, [allFA[evidence] for evidence in listFA]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Evidence Codes", outDir=outDir, name=projectName, organism=organism) batchExecute(batchList, plotFA, [allFA[evidence] for evidence in listFA], doGrid=True) batchList = ["coherenceHisto2D", "numberAnnotHisto2D"] batchExecute(batchList, plotFA, [allFA[evidence] for evidence in listFA], doGrid=True) #Compare Functional annotations compareFA = CompareFA() batchList = ["venn", "funcSim"] batchExecute(batchList, compareFA, [allFA[evidence] for evidence in listFA]) batchList = ["recall", "precision"] batchExecute(batchList, compareFA, [allFA[evidence] for evidence in listFA]) #Plot statistics of the comparison between Functional annotations batchList = ["venn", "funcSymSim"] batchExecute(batchList, plotFA, compareFA, [allFA[evidence] for evidence in listFA]) batchList = ["recall", "precision"] batchExecute(batchList, plotFA, compareFA, [allFA[evidence] for evidence in listFA]) #----------------------------------------------- #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) exportList = [ "unconnected", "coverage", "richness", "numberAnnot", "coherence", "compactness", "specificity", "informationContent", "redundancy" ] reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism) reportFA.printStatistics([allFA[evidence] for evidence in listFA], exportList) reportFA.saveStatistics([allFA[evidence] for evidence in listFA], exportList) #----------------------------------------------- # Invididual contributions of evidence codes contribution = dict() for ec in ["IC", "TAS", "ISS", "NAS", "IEA"]: contribution[ec] = set([ (gp, go) for aspect in ["cellular_component", "molecular_function", "biological_process"] for gp in allFA[ec].GPtoGO[aspect] for go in allFA[ec].GPtoGO[aspect][gp] ]) total_Annotation = sum( [len(contribution[ec]) for ec in ["IC", "TAS", "ISS", "NAS"]]) for ec in ["IC", "TAS", "ISS", "NAS"]: print "%.02f %% of the annotations are supported by %s" % ( 100. * len(contribution[ec]) / total_Annotation, ec) batchList = ["recall", "precision"] batchExecute(batchList, compareFA, [ allFA[evidence] for evidence in ["EXP2", "ISS", "TAS", "NAS", "IC", "AHC", "IEA"] ]) #----------------------------------------------- reference = "EXP2" #plotEvidence=["AHC", "IEA"] plotEvidence = ["ISS", "TAS", "NAS", "IC", "AHC", "IEA"] evidenceMarker = dict(zip(plotEvidence, ['s', 'd', 'D', '*', 'p', 'h'])) evidenceSize = dict(zip(plotEvidence, [8, 8, 8, 8, 15, 15])) aspectColor = dict(zip(allAspect, ["blue", "green", "red", "cyan"])) fig = figure(figsize=(8, 8)) for evidence in plotEvidence: for aspect in allAspect: if aspect == "All_aspects_of_GO": continue allX = compareFA['recall'][aspect][(evidence, reference)].values() allY = compareFA['precision'][aspect][(evidence, reference)].values() meanX = mean(allX) errX = std(allX) / sqrt(len(allX)) meanY = mean(allY) errY = std(allY) / sqrt(len(allY)) errorbar(meanX, meanY, xerr=errX, yerr=errY, alpha=0.9, hold=True, mfc=aspectColor[aspect], ecolor=aspectColor[aspect], marker=evidenceMarker[evidence], ms=evidenceSize[evidence]) xlabel("Verspoor Hierarchical Recall") ylabel("Verspoor Hierarchical Precision") allMarker = ['o', 'o', 'o', 's', 'd', 'D', '*', 'p', 'h'] allColor = [ "green", "red", "cyan", "white", "white", "white", "white", "white", "white" ] allLabel = [ aspect.replace("_", " ") for aspect in allAspect if not aspect == "All_aspects_of_GO" ] allLabel.extend(plotEvidence) foo = [ Line2D(arange(5), arange(5), ls='-', marker=m, color=c, label=l) for m, c, l in zip(allMarker, allColor, allLabel) ] leg = legend(foo, allLabel, loc="upper left", numpoints=1) leg.legendPatch.set_alpha(0.5) grid() outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) figName = "%s/PrecisionVSRecall.png" % outDir savefig(figName)
def compare_COPSAandB2G(projectDir): """ Add some comments here """ organism = "wheat" #Read rice microarray target sequence to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) projectName = "MATT" #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/COPSA_%s.tab" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allPipeName = ["COPSA", "B2G"] allFileType = ["GP2GO", "B2G"] pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA #COPSA annotations but only for GPs that are also annotated by B2G FA = FuncAnnot("COPSAandB2G", refSet, G, organism=organism) FA.add(pipeline["COPSA"]) #So remove the GP that are not in B2G for aspect in G.aspect: copsaOnly = set(FA.GPtoGO[aspect].keys()).difference( pipeline["B2G"].GPtoGO[aspect].keys()) FA.removeGP(copsaOnly, myAspects=[aspect]) pipeline[FA.name] = FA allPipeName.append(FA.name) #B2G annotations but only for GPs that are also annotated by COPSA FA = FuncAnnot("B2GandCOPSA", refSet, G, organism=organism) FA.add(pipeline["B2G"]) #So remove the GP that are not in B2G for aspect in G.aspect: b2gOnly = set(FA.GPtoGO[aspect].keys()).difference( pipeline["COPSA"].GPtoGO[aspect].keys()) FA.removeGP(b2gOnly, myAspects=[aspect]) pipeline[FA.name] = FA allPipeName.append(FA.name) #COPSA annotations only FA = FuncAnnot("COPSAonly", refSet, G, organism=organism) FA.add(pipeline["COPSA"]) #So remove the GP that are in B2G for aspect in G.aspect: b2g = pipeline["B2G"].GPtoGO[aspect].keys() FA.removeGP(b2g, myAspects=[aspect]) pipeline[FA.name] = FA allPipeName.append(FA.name) #B2G annotations only FA = FuncAnnot("B2Gonly", refSet, G, organism=organism) FA.add(pipeline["B2G"]) #So remove the GP that are in COPSA for aspect in G.aspect: copsa = pipeline["COPSA"].GPtoGO[aspect].keys() FA.removeGP(copsa, myAspects=[aspect]) pipeline[FA.name] = FA allPipeName.append(FA.name) #Analyse Functional annotations analyseFA = AnalyseFA() batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent" ] batchList = ["removeUnconnected"] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Annotation pipelines", outDir=outDir, name=projectName, organism=organism) batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) compareCoexpression([ pipeline[name] for name in ["COPSAandB2G", "B2GandCOPSA", "COPSAonly", "B2Gonly"] ])
def compareWheatPipelines(projectDir): """ Add some comments here """ organism = "wheat" #Read rice microarray target sequence to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) projectName = "MATT" #Read Functional annotations allFileName = list() allFileName.append( "%s/Annotation/blast2goPaths_fin_aracyc_%s_unionBest.tab" % (projectDir, organism)) allFileName.append("%s/Annotation/pfam2goPaths2_%s_unionBest.tab" % (projectDir, organism)) allFileName.append( "%s/Annotation/pfam2goPaths2_%s_unionBest___blast2goPaths_fin_aracyc_%s_unionBest_merged.tab" % (projectDir, organism, organism)) allFileName.append("%s/Annotation/COPSA_%s.tab" % (projectDir, organism)) allFileName.append("%s/Annotation/Affy_%s.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allPipeName = ["Blast", "Pfam", "Merge", "COPSA", "AFFY", "B2G"] allFileType = ["GP2GO", "GP2GO", "GP2GO", "GP2GO", "AFFY", "B2G"] pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA #Analyse Functional annotations analyseFA = AnalyseFA() batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent" ] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Annotation pipelines", outDir=outDir, name=projectName, organism=organism) batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) batchList = ["coherenceHisto2D", "numberAnnotHisto2D"] batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) #Compare Functional annotations compareFA = CompareFA() batchList = ["venn", "funcSim"] batchExecute(batchList, compareFA, [pipeline[pipeName] for pipeName in ["COPSA", "AFFY", "B2G"]]) #Plot statistics of the comparison between Functional annotations batchList = ["venn", "funcSymSim"] batchExecute(batchList, plotFA, compareFA, [pipeline[pipeName] for pipeName in ["COPSA", "AFFY", "B2G"]]) #----------------------------------------------- #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) exportList = [ "unconnected", "coverage", "numberAnnot", "richness", "coherence", "compactness", "specificity", "informationContent", "redundancy" ] reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism) reportFA.printStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) reportFA.saveStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList)
def compareCoexpression(allFA): outDir = "%s/Graph/%s/Coexpression" % (projectDir, organism) createDir(outDir) fileName = "%s/correlations.tab_Filter_1000_0.001" % outDir MAT = readCOEX_Filter(fileName) for FA in allFA: allCOEX, allSim = dict(), dict() for aspect in allAspect: if aspect == "All_aspects_of_GO": continue allCOEX[aspect] = [ MAT[pA][pB] for pA in MAT for pB in MAT[pA] if pA in FA.GPtoGO[aspect] and pB in FA.GPtoGO[aspect] ] allSim[aspect] = [ G.GS2([ G.GOtoInt(FA.GPtoGO[aspect][pA]), G.GOtoInt(FA.GPtoGO[aspect][pB]) ])[0] for pA in MAT for pB in MAT[pA] if pA in FA.GPtoGO[aspect] and pB in FA.GPtoGO[aspect] ] ax = surface2D(allCOEX[aspect], allSim[aspect], interp="bicubic", cblabel="Number of probeset", bins=50) #ax.scatter(X,Y, facecolor='none', edgecolor='black', alpha=0.1) ax.set_xlabel("Coexpression") ax.set_ylabel("Semantic Distance") ax.xaxis.grid() ax.yaxis.grid() title("GO %s in %s" % (aspect.replace("_", " "), FA.name)) figName = "%s/%s_COEXvsSIM_%s.png" % (outDir, FA.name, aspect) savefig(figName) from scipy import stats allKeys = [(pA, pB) for pA in MAT for pB in MAT[pA] if MAT[pA][pB] > 0.95] for FA in allFA: print "=========================================================" print "FA: %s" % FA.name print " GO Aspect \t Mean (Std) \t 50% [5%,95%] \t N" for aspect in allAspect: if aspect == "All_aspects_of_GO": continue Coex = [ MAT[pA][pB] for pA, pB in allKeys if pA in FA.GPtoGO[aspect] and pB in FA.GPtoGO[aspect] ] Sim = [ G.GS2([ G.GOtoInt(FA.GPtoGO[aspect][pA]), G.GOtoInt(FA.GPtoGO[aspect][pB]) ])[0] for pA, pB in allKeys if pA in FA.GPtoGO[aspect] and pB in FA.GPtoGO[aspect] ] print "%s \t %.2f (%.2f) \t %.2f [%.2f,%.2f]\t %d" % ( aspect, mean(Sim), std(Sim), stats.scoreatpercentile( Sim, 50), stats.scoreatpercentile( Sim, 5), stats.scoreatpercentile(Sim, 95), len(Sim))
def worseFunctionalSimilarity(projectDir): """ This function identifies the ten most different annotation sets between Affymetrix and Blast2GO for a Bovine array """ from AIGO.Similarity import GOSet_PWSimilarity from itertools import izip projectName = "bovinePipeline" organism = "bovine" logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info( "This function identifies the ten most different annotation sets between Affymetrix and Blast2GO for a Bovine array" ) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("name of the project : %s " % projectName) #Read bovine microarray probe set to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) fileName = "%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism) FA1 = FuncAnnot("AFFY", refSet, G, organism=organism) FA1.read(fileName, fileType="AFFY") fileName = "%s/Annotation/B2G_%s.annot" % (projectDir, organism) FA2 = FuncAnnot("B2G", refSet, G, organism=organism) FA2.read(fileName, fileType="B2G") #Analyse Functional annotations analyseFA = AnalyseFA() batchExecute(["removeUnconnected"], analyseFA, [FA1, FA2]) outDir = "%s/Graph/%s/WorseFuncSim" % (projectDir, organism) createDir(outDir) N = 10 logger.info("=================================================") logger.info("Plotting the %d most dissimilar annotation sets" % N) logger.info("directory : %s" % outDir) for aspect in G.aspect: commonGene = set(FA1.GPtoGO[aspect].keys()).intersection( FA2.GPtoGO[aspect].keys()) logger.info("%s : processing %d annotation sets " % (aspect, len(commonGene))) allD1, allD2 = list(), list() for i, g in enumerate(commonGene): sim, l = GOSet_PWSimilarity(G, FA1.GPtoGO[aspect][g], FA2.GPtoGO[aspect][g]) allD1.append(l[0]) allD2.append(l[1]) allD = map(lambda D: ((array(D[0]) + array(D[1])) / 2.), izip(allD1, allD2)) idx = argsort(allD) for i in arange(0, N): gp = list(commonGene)[idx[i]] figName = "%s/%s_annotation_%s_from_%s_%s.png" % ( outDir, aspect, gp, FA1.name, FA2.name) ttl = "%s annotations of %s from %s (green) and %s (red) : Functional similarity = %.2f" % ( aspect.replace("_", " "), gp, FA1.name, FA2.name, allD[idx[i]]) FA1.G.compare_InducedGraph(FA1.GPtoGO[aspect][gp], FA2.GPtoGO[aspect][gp], figName=figName, ttl=ttl) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("")
def compareBovineAndRandom(projectDir): """ This function compare the properties of 3 functional annotations for a Bovine array + a randomize version of Affymetrix functional annotations """ projectName = "BovineAndRandom" organism = "bovine" logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info( "This function compare the properties of 3 functional annotations for a Bovine array + a randomize version of Affymetrix functional annotations." ) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("name of the project : %s " % projectName) #Read bovine microarray probe set to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allFileName.append("%s//Annotation/AID_%s.txt" % (projectDir, organism)) allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allPipeName = ["AFFY", "B2G", "AID", "resample"] allFileType = ["AFFY", "B2G", "AID", "AFFY"] pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA # Randimize FA randomizeFA = RandomizeFA() analyseFA = AnalyseFA() #----------------------------------------------- # Shuffle functional annotation batchList = ["sampleAnnotation"] batchExecute(batchList, randomizeFA, [pipeline[pipeName] for pipeName in ["resample"]]) batchList = ["coherence", "redundancy", "numberAnnot"] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Annotation pipelines", outDir=outDir, name="Resample", organism=organism, ext="png") batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) batchList = ["coherenceHisto2D", "numberAnnotHisto2D"] batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True, tit="") logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("")
def compareRandomizePipelines(projectDir): """ This function compare the properties of 3 randomized functional annotations for a Bovine array. """ projectName = "randomizePipeline" organism = "bovine" logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info( "This function compare the properties of 3 randomized functional annotations for a Bovine array." ) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("name of the project : %s " % projectName) #Read rice microarray target sequence to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism)) allPipeName = ["AFFY", "B2G", "AID"] allFileType = allPipeName pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA # Randomize FA randomizeFA = RandomizeFA() #----------------------------------------------- # Shuffle functional annotation batchList = ["shuffleAnnotation"] batchExecute(batchList, randomizeFA, [pipeline[pipeName] for pipeName in allPipeName]) #Analyse Functional annotations analyseFA = AnalyseFA() batchList = ["coherence", "redundancy"] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) exportList = ["coherence", "redundancy"] report = ReportFA(name="Randomize shuffle", outDir=outDir, organism=organism) report.printStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) report.saveStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) #----------------------------------------------- # Resample functional annotation batchList = ["sampleAnnotation"] batchExecute(batchList, randomizeFA, [pipeline[pipeName] for pipeName in allPipeName]) #Analyse Functional annotations #batchList=["obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent"] batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "redundancy", "specificity", "informationContent" ] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) #exportList=["coverage", "numberAnnot", "richness", "coherence", "compactness", "specificity", "informationContent", "redundancy"] exportList = [ "coverage", "numberAnnot", "richness", "specificity", "informationContent", "redundancy" ] report = ReportFA(name="Randomize sample", outDir=outDir, organism=organism) report.printStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) report.saveStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("")
def compareBovinePipelines(projectDir): """ This function compare the properties of 3 functional annotations for a Bovine array. """ projectName = "bovinePipeline" organism = "bovine" logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info( "This function compare the properties of 3 functional annotations for a Bovine array." ) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("name of the project : %s " % projectName) #Read bovine microarray probe set to define the set of gene products fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism) refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta") #Read GO ontoloy fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir) G = readGOoboXML(fileName, force=False) #Read Functional annotations allFileName = list() allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)) allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism)) allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism)) allPipeName = ["AFFY", "B2G", "AID"] allFileType = allPipeName pipeline = dict() for pipeName, fileName, fileType in zip(allPipeName, allFileName, allFileType): FA = FuncAnnot(pipeName, refSet, G, organism=organism) FA.read(fileName, fileType=fileType) pipeline[pipeName] = FA #----------------------------------------------- #Analyse Functional annotations analyseFA = AnalyseFA() #batchList=["obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent"] batchList = [ "obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "redundancy", "specificity", "informationContent" ] batchExecute(batchList, analyseFA, [pipeline[pipeName] for pipeName in allPipeName]) #How big are the largest annotation sets ? analyseFA.largestSet([pipeline[pipeName] for pipeName in allPipeName]) logger.info("The largest sets of annotations are :") for pipeName in allPipeName: FA = pipeline[pipeName] logger.info("\t%d for %s" % (FA['largestSet']['All_aspects_of_GO'], FA.name)) #Plot statistics of Functional annotations outDir = "%s/Graph/%s" % (projectDir, organism) createDir(outDir) plotFA = PlotFA(xlabel="Annotation pipelines", outDir=outDir, name=projectName, organism=organism, ext="png") batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True) #batchList=["coherenceHisto2D", "numberAnnotHisto2D"] batchList = ["numberAnnotHisto2D"] batchExecute(batchList, plotFA, [pipeline[pipeName] for pipeName in allPipeName], doGrid=True, tit="") #----------------------------------------------- #Compare Functional annotations compareFA = CompareFA() batchList = ["venn", "funcSim"] batchExecute(batchList, compareFA, [pipeline[pipeName] for pipeName in allPipeName]) #Plot statistics of the comparison between Functional annotations batchList = ["venn", "funcSymSim"] batchExecute(batchList, plotFA, compareFA, [pipeline[pipeName] for pipeName in allPipeName], tit="") #----------------------------------------------- #Export statistics to Excel outDir = "%s/Export/%s" % (projectDir, organism) createDir(outDir) #exportList=["unconnected", "coverage", "richness", "numberAnnot", "coherence", "compactness", "specificity", "informationContent", "redundancy"] exportList = [ "unconnected", "coverage", "richness", "numberAnnot", "specificity", "informationContent", "redundancy" ] reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism) reportFA.printStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) reportFA.saveStatistics([pipeline[pipeName] for pipeName in allPipeName], exportList) logger.info( "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦" ) logger.info("")
def createPlotFA(project): if not project.has_key('plotFA'): outDir="%s/Graph" % (project['directory']) createDir(outDir) project['plotFA']=PlotFA(xlabel="Functional Annotation", outDir=outDir, name=project['name'], organism=project['organism'], grid=True)