示例#1
0
 def OnLoadGO(self, event):
     fileName=self.frame.notebook.box_GO.GetValue()
     #refType=self.frame.notebook.radio_btn_GO.GetStringSelection()
     force=self.frame.notebook.checkbox_GO.GetValue()
     GO=readGOoboXML(fileName=fileName, force=force)
     self.project.addGO(GO)
     self.event.notify("GO")
     self.updateStatus()
示例#2
0
def compareRiceAffymetrixReleases(projectDir):
    """
    This function compare the properties of 10 release of Affymetrix annotations for a Rice array. 
    """

    projectName = "Affymetrix"
    organism = "rice"

    #Read rice microarray target sequence to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism, fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    #Read 11 release of Affymetrix Functional annotations
    release = arange(20, 32)

    Affy = dict()
    for r in release:
        FA = FuncAnnot(str(r), refSet, G, organism=organism)
        fileName = "%s/Annotation/Affy_%s.na%d.annot.csv" % (projectDir,
                                                             organism, r)
        FA.read(fileName, fileType="AFFY")
        Affy[r] = FA

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    batchList = [
        "obsolete", "unconnected", "removeUnconnected", "coverage", "richness"
    ]
    batchExecute(batchList, analyseFA, [Affy[r] for r in release])

    #Plot statistics of Functional annotations
    outDir = "%s/Graph/%s" % (projectDir, organism)
    createDir(outDir)
    plotFA = PlotFA(xlabel="Affymetrix Release number",
                    outDir=outDir,
                    name=projectName,
                    organism=organism,
                    ext="pdf")
    batchExecute(batchList,
                 plotFA, [Affy[r] for r in release],
                 doGrid=True,
                 lloc="upper right")

    #Compare  release 20  and 31
    compareFA = CompareFA()
    batchList = ["venn", "funcSim"]
    batchExecute(batchList, compareFA, [Affy[20], Affy[31]])

    #Plot statistics of the comparison
    batchList = ["venn", "funcSymSim"]
    batchExecute(batchList,
                 plotFA,
                 compareFA, [Affy[20], Affy[31]],
                 doGrid=True,
                 tit="")

    #Find the worse semantic similarity between 20 and 31
    worseFunctionalSimilarity(projectDir, organism, Affy[20], Affy[31])

    #Study the evolution of Glutamine Synthetase (GS)  annotations
    GSAnnotations(projectDir, organism, G, Affy)
示例#3
0
def compareSimilarity(projectDir):
    """
    This function compare Similarity measures.
    """

    projectName = "simPipeline"
    organism = "bovine"

    #Read bovine microarray probe set to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    #Read Functional annotations
    allFileName = list()
    allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" %
                       (projectDir, organism))
    allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism))

    allPipeName = ["AFFY", "B2G"]
    allFileType = allPipeName

    pipeline = dict()
    for pipeName, fileName, fileType in zip(allPipeName, allFileName,
                                            allFileType):
        FA = FuncAnnot(pipeName, refSet, G, organism=organism)
        FA.read(fileName, fileType=fileType)
        pipeline[pipeName] = FA

    #-----------------------------------------------

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    batchList = ["removeUnconnected"]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Compute information content
    logger.info("=================================================")
    logger.info("Computing Information Content")
    allIC = dict()
    for pipeName in allPipeName:
        FA = pipeline[pipeName]
        logger.info("\t%s" % FA.name)
        allIC[pipeName] = dict()
        for a in FA.G.aspect:
            allIC[pipeName][a] = dict()
            for go in FA.GOtoGP[a]:
                n = len(FA.GOtoGP[a][go])
                for ans in FA.G.ancestors(FA.G.get_intid(go)):
                    allIC[pipeName][a][ans] = allIC[pipeName][a].get(ans,
                                                                     0) + n

        for a in FA.G.aspect:
            if len(allIC[pipeName][a].values()) == 0:
                continue
            m = max(allIC[pipeName][a].values())
            for go in allIC[pipeName][a]:
                allIC[pipeName][a][go] = -1. * log(
                    1. * allIC[pipeName][a][go] / m)

    #Compare coherence of biological process annotation sets in AFFY given by three different similarity metrics
    logger.info("=================================================")
    logger.info(
        "Computing functional coherence of biological process annotation sets in AFFY given by three different similarity metrics"
    )
    aspect = "biological_process"
    pipeName = "B2G"
    FA = pipeline[pipeName]
    logger.info("\tGS2")
    allGS2 = [
        mean(GOSet_Similarity(G, FA.GPtoGO[aspect][gp], metric="GS2"))
        for gp in FA.GPtoGO[aspect] if len(FA.GPtoGO[aspect][gp]) > 1
    ]
    logger.info("\tCzekanowskiDice")
    allCD = [
        mean(
            GOSet_Similarity(G,
                             FA.GPtoGO[aspect][gp],
                             metric="CzekanowskiDice"))
        for gp in FA.GPtoGO[aspect] if len(FA.GPtoGO[aspect][gp]) > 1
    ]
    logger.info("\tResnik")
    allResnik = [
        mean(
            GOSet_Similarity(G,
                             FA.GPtoGO[aspect][gp],
                             metric="Resnik",
                             IC=allIC[FA.name])) for gp in FA.GPtoGO[aspect]
        if len(FA.GPtoGO[aspect][gp]) > 1
    ]
    logger.info("\tCorrelation between GS2 and CzekanowskiDice : %.2f" %
                corrcoef(allGS2, allCD)[0][1])
    logger.info("\tCorrelation between CzekanowskiDice and Resnik: %.2f" %
                corrcoef(allCD, allResnik)[0][1])

    #Compare molecular function  annotation sets in AFFY and B2G using three similarity metrics
    logger.info("=================================================")
    logger.info(
        "Comparing molecular function annotation sets in AFFY and B2G using three different similarity metrics"
    )
    aspect = "molecular_function"
    commonGene = set(pipeline["AFFY"].GPtoGO[aspect].keys()).intersection(
        pipeline["B2G"].GPtoGO[aspect].keys())
    logger.info("\tProcessing %d genes" % len(commonGene))
    allGS2, allCD, allResnik = list(), list(), list()
    for gp in commonGene:
        GO1 = pipeline["AFFY"].GPtoGO[aspect][gp]
        GO2 = pipeline["B2G"].GPtoGO[aspect][gp]

        allGS2.append(GOSet_PWSimilarity(G, GO1, GO2, metric="GS2")[0])
        allCD.append(
            GOSet_PWSimilarity(G, GO1, GO2, metric="CzekanowskiDice")[0])
        allResnik.append(
            GOSet_PWSimilarity(G, GO1, GO2, metric="Resnik",
                               IC=allIC[FA.name])[0])
    logger.info("\tCorrelation between GS2 and CzekanowskiDice : %.2f" %
                corrcoef(allGS2, allCD)[0][1])
    logger.info("\tCorrelation between CzekanowskiDice and Resnik: %.2f" %
                corrcoef(allCD, allResnik)[0][1])
示例#4
0
def compareEvidence(projectDir):
    """
    This function compare electronically infered and manually curated annotations to experimental annotations
    """

    projectName = "EvidenceCode"
    organism = "allSpecies"

    refSet = RefSet(organism)

    allOrg = [
        "Arabidopsis_thaliana", "Drosophila_melanogaster",
        "Mycobacterium_tuberculosis_ATCC_25618", "Schizosaccharomyces_pombe",
        "Bos_taurus", "Escherichia_coli_ATCC_27325",
        "Mycobacterium_tuberculosis_Oshkosh", "Caenorhabditis_elegans",
        "Escherichia_coli_MG1655", "Oryza_sativa", "Synechocystis_sp",
        "Candida_albicans_SC5314", "Gallus_gallus",
        "Pseudomonas_fluorescens_Pf-5", "Danio_rerio", "Homo_sapiens",
        "Rattus_norvegicus"
    ]

    for refOrg in allOrg:
        #Define the set of gene products
        fileName = "%s/EvidenceCode/%s/two_experimental_evidence.goa" % (
            projectDir, refOrg)
        refSet.add(fileName, refType="GAF")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    #Read all annotations
    fileType = "GAF"

    evidenceCodes = ["EXP2", "IC", "TAS", "ISS", "NAS", "IEA"]

    allFA = dict()

    #-----------------------------------------------
    #Read Functional annotations obtained by experiments
    pipeName = "EXP2"
    EXP2 = FuncAnnot(pipeName, refSet, G, organism=organism)
    for refOrg in allOrg:
        fileName = "%s/EvidenceCode/%s/two_experimental_evidence.goa" % (
            projectDir, refOrg)
        FA = FuncAnnot(pipeName, refSet, G, organism=refOrg)
        FA.read(fileName, fileType=fileType)
        EXP2.add(FA)
    allFA[pipeName] = EXP2

    #-----------------------------------------------
    #Read Functional annotations obtained by human curation
    for pipeName in ["IC", "TAS", "ISS", "NAS"]:
        EV = FuncAnnot(pipeName, refSet, G, organism=organism)
        for refOrg in allOrg:
            fileName = "%s/EvidenceCode/%s/%s.goa" % (projectDir, refOrg,
                                                      pipeName)
            if not os.path.exists(fileName):
                continue
            FA = FuncAnnot(pipeName, refSet, G, organism=refOrg)
            FA.read(fileName, fileType=fileType)
            EV.add(FA)
        allFA[pipeName] = EV

    #Merge FAs Assigned by Human Curator
    FA = FuncAnnot("AHC", refSet, G, organism=organism)
    #for evidence in ["IC", "TAS", "ISS", "NAS"]:
    for evidence in ["IC", "ISS", "NAS"]:
        FA.add(allFA[evidence])
    allFA["AHC"] = FA

    #-----------------------------------------------
    #Read Functional annotations obtained without human curation
    for pipeName in ["IEA"]:
        EV = FuncAnnot(pipeName, refSet, G, organism=organism)
        for refOrg in allOrg:
            fileName = "%s/EvidenceCode/%s/%s.goa" % (projectDir, refOrg,
                                                      pipeName)
            if not os.path.exists(fileName):
                continue
            FA = FuncAnnot(pipeName, refSet, G, organism=refOrg)
            FA.read(fileName, fileType=fileType)
            EV.add(FA)
        allFA[pipeName] = EV

    #-----------------------------------------------
    listFA = ["EXP2", "AHC", "IEA"]

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    batchList = [
        "obsolete", "unconnected", "removeUnconnected", "coverage", "richness",
        "numberAnnot", "coherence", "redundancy", "removeRedundancy",
        "compactness", "specificity", "informationContent"
    ]
    batchExecute(batchList, analyseFA,
                 [allFA[evidence] for evidence in listFA])

    #Plot statistics of Functional annotations
    outDir = "%s/Graph/%s" % (projectDir, organism)
    createDir(outDir)
    plotFA = PlotFA(xlabel="Evidence Codes",
                    outDir=outDir,
                    name=projectName,
                    organism=organism)
    batchExecute(batchList,
                 plotFA, [allFA[evidence] for evidence in listFA],
                 doGrid=True)

    batchList = ["coherenceHisto2D", "numberAnnotHisto2D"]
    batchExecute(batchList,
                 plotFA, [allFA[evidence] for evidence in listFA],
                 doGrid=True)

    #Compare  Functional annotations
    compareFA = CompareFA()
    batchList = ["venn", "funcSim"]
    batchExecute(batchList, compareFA,
                 [allFA[evidence] for evidence in listFA])
    batchList = ["recall", "precision"]
    batchExecute(batchList, compareFA,
                 [allFA[evidence] for evidence in listFA])

    #Plot statistics of the comparison between Functional annotations
    batchList = ["venn", "funcSymSim"]
    batchExecute(batchList, plotFA, compareFA,
                 [allFA[evidence] for evidence in listFA])
    batchList = ["recall", "precision"]
    batchExecute(batchList, plotFA, compareFA,
                 [allFA[evidence] for evidence in listFA])

    #-----------------------------------------------
    #Export statistics to Excel
    outDir = "%s/Export/%s" % (projectDir, organism)
    createDir(outDir)

    exportList = [
        "unconnected", "coverage", "richness", "numberAnnot", "coherence",
        "compactness", "specificity", "informationContent", "redundancy"
    ]
    reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism)
    reportFA.printStatistics([allFA[evidence] for evidence in listFA],
                             exportList)
    reportFA.saveStatistics([allFA[evidence] for evidence in listFA],
                            exportList)

    #-----------------------------------------------
    # Invididual contributions of evidence codes
    contribution = dict()
    for ec in ["IC", "TAS", "ISS", "NAS", "IEA"]:
        contribution[ec] = set([
            (gp, go) for aspect in
            ["cellular_component", "molecular_function", "biological_process"]
            for gp in allFA[ec].GPtoGO[aspect]
            for go in allFA[ec].GPtoGO[aspect][gp]
        ])

    total_Annotation = sum(
        [len(contribution[ec]) for ec in ["IC", "TAS", "ISS", "NAS"]])

    for ec in ["IC", "TAS", "ISS", "NAS"]:
        print "%.02f %% of the annotations are supported by %s" % (
            100. * len(contribution[ec]) / total_Annotation, ec)

    batchList = ["recall", "precision"]
    batchExecute(batchList, compareFA, [
        allFA[evidence]
        for evidence in ["EXP2", "ISS", "TAS", "NAS", "IC", "AHC", "IEA"]
    ])

    #-----------------------------------------------
    reference = "EXP2"
    #plotEvidence=["AHC", "IEA"]
    plotEvidence = ["ISS", "TAS", "NAS", "IC", "AHC", "IEA"]

    evidenceMarker = dict(zip(plotEvidence, ['s', 'd', 'D', '*', 'p', 'h']))
    evidenceSize = dict(zip(plotEvidence, [8, 8, 8, 8, 15, 15]))
    aspectColor = dict(zip(allAspect, ["blue", "green", "red", "cyan"]))

    fig = figure(figsize=(8, 8))
    for evidence in plotEvidence:

        for aspect in allAspect:
            if aspect == "All_aspects_of_GO":
                continue

            allX = compareFA['recall'][aspect][(evidence, reference)].values()
            allY = compareFA['precision'][aspect][(evidence,
                                                   reference)].values()

            meanX = mean(allX)
            errX = std(allX) / sqrt(len(allX))
            meanY = mean(allY)
            errY = std(allY) / sqrt(len(allY))

            errorbar(meanX,
                     meanY,
                     xerr=errX,
                     yerr=errY,
                     alpha=0.9,
                     hold=True,
                     mfc=aspectColor[aspect],
                     ecolor=aspectColor[aspect],
                     marker=evidenceMarker[evidence],
                     ms=evidenceSize[evidence])

    xlabel("Verspoor Hierarchical Recall")
    ylabel("Verspoor Hierarchical Precision")

    allMarker = ['o', 'o', 'o', 's', 'd', 'D', '*', 'p', 'h']
    allColor = [
        "green", "red", "cyan", "white", "white", "white", "white", "white",
        "white"
    ]
    allLabel = [
        aspect.replace("_", " ") for aspect in allAspect
        if not aspect == "All_aspects_of_GO"
    ]
    allLabel.extend(plotEvidence)

    foo = [
        Line2D(arange(5), arange(5), ls='-', marker=m, color=c, label=l)
        for m, c, l in zip(allMarker, allColor, allLabel)
    ]
    leg = legend(foo, allLabel, loc="upper left", numpoints=1)
    leg.legendPatch.set_alpha(0.5)

    grid()

    outDir = "%s/Graph/%s" % (projectDir, organism)
    createDir(outDir)
    figName = "%s/PrecisionVSRecall.png" % outDir
    savefig(figName)
示例#5
0
def compare_COPSAandB2G(projectDir):
    """
    Add some comments here
    """

    organism = "wheat"

    #Read rice microarray target sequence to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    projectName = "MATT"

    #Read Functional annotations
    allFileName = list()
    allFileName.append("%s/Annotation/COPSA_%s.tab" % (projectDir, organism))
    allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism))

    allPipeName = ["COPSA", "B2G"]
    allFileType = ["GP2GO", "B2G"]

    pipeline = dict()
    for pipeName, fileName, fileType in zip(allPipeName, allFileName,
                                            allFileType):
        FA = FuncAnnot(pipeName, refSet, G, organism=organism)
        FA.read(fileName, fileType=fileType)
        pipeline[pipeName] = FA

    #COPSA annotations but only for GPs that are also annotated by B2G
    FA = FuncAnnot("COPSAandB2G", refSet, G, organism=organism)
    FA.add(pipeline["COPSA"])
    #So remove the GP that are not in B2G
    for aspect in G.aspect:
        copsaOnly = set(FA.GPtoGO[aspect].keys()).difference(
            pipeline["B2G"].GPtoGO[aspect].keys())
        FA.removeGP(copsaOnly, myAspects=[aspect])
    pipeline[FA.name] = FA
    allPipeName.append(FA.name)

    #B2G annotations but only for GPs that are also annotated by COPSA
    FA = FuncAnnot("B2GandCOPSA", refSet, G, organism=organism)
    FA.add(pipeline["B2G"])
    #So remove the GP that are not in B2G
    for aspect in G.aspect:
        b2gOnly = set(FA.GPtoGO[aspect].keys()).difference(
            pipeline["COPSA"].GPtoGO[aspect].keys())
        FA.removeGP(b2gOnly, myAspects=[aspect])
    pipeline[FA.name] = FA
    allPipeName.append(FA.name)

    #COPSA annotations only
    FA = FuncAnnot("COPSAonly", refSet, G, organism=organism)
    FA.add(pipeline["COPSA"])
    #So remove the GP that are in B2G
    for aspect in G.aspect:
        b2g = pipeline["B2G"].GPtoGO[aspect].keys()
        FA.removeGP(b2g, myAspects=[aspect])
    pipeline[FA.name] = FA
    allPipeName.append(FA.name)

    #B2G annotations only
    FA = FuncAnnot("B2Gonly", refSet, G, organism=organism)
    FA.add(pipeline["B2G"])
    #So remove the GP that are in COPSA
    for aspect in G.aspect:
        copsa = pipeline["COPSA"].GPtoGO[aspect].keys()
        FA.removeGP(copsa, myAspects=[aspect])
    pipeline[FA.name] = FA
    allPipeName.append(FA.name)

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    batchList = [
        "obsolete", "unconnected", "removeUnconnected", "coverage", "richness",
        "numberAnnot", "coherence", "redundancy", "compactness", "specificity",
        "informationContent"
    ]
    batchList = ["removeUnconnected"]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Plot statistics of Functional annotations
    outDir = "%s/Graph/%s" % (projectDir, organism)
    createDir(outDir)
    plotFA = PlotFA(xlabel="Annotation pipelines",
                    outDir=outDir,
                    name=projectName,
                    organism=organism)
    batchExecute(batchList,
                 plotFA, [pipeline[pipeName] for pipeName in allPipeName],
                 doGrid=True)

    compareCoexpression([
        pipeline[name]
        for name in ["COPSAandB2G", "B2GandCOPSA", "COPSAonly", "B2Gonly"]
    ])
示例#6
0
def compareWheatPipelines(projectDir):
    """
    Add some comments here
    """

    organism = "wheat"

    #Read rice microarray target sequence to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    projectName = "MATT"

    #Read Functional annotations
    allFileName = list()
    allFileName.append(
        "%s/Annotation/blast2goPaths_fin_aracyc_%s_unionBest.tab" %
        (projectDir, organism))
    allFileName.append("%s/Annotation/pfam2goPaths2_%s_unionBest.tab" %
                       (projectDir, organism))
    allFileName.append(
        "%s/Annotation/pfam2goPaths2_%s_unionBest___blast2goPaths_fin_aracyc_%s_unionBest_merged.tab"
        % (projectDir, organism, organism))
    allFileName.append("%s/Annotation/COPSA_%s.tab" % (projectDir, organism))
    allFileName.append("%s/Annotation/Affy_%s.annot.csv" %
                       (projectDir, organism))
    allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism))

    allPipeName = ["Blast", "Pfam", "Merge", "COPSA", "AFFY", "B2G"]
    allFileType = ["GP2GO", "GP2GO", "GP2GO", "GP2GO", "AFFY", "B2G"]

    pipeline = dict()
    for pipeName, fileName, fileType in zip(allPipeName, allFileName,
                                            allFileType):
        FA = FuncAnnot(pipeName, refSet, G, organism=organism)
        FA.read(fileName, fileType=fileType)
        pipeline[pipeName] = FA

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    batchList = [
        "obsolete", "unconnected", "removeUnconnected", "coverage", "richness",
        "numberAnnot", "coherence", "redundancy", "compactness", "specificity",
        "informationContent"
    ]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Plot statistics of Functional annotations
    outDir = "%s/Graph/%s" % (projectDir, organism)
    createDir(outDir)
    plotFA = PlotFA(xlabel="Annotation pipelines",
                    outDir=outDir,
                    name=projectName,
                    organism=organism)
    batchExecute(batchList,
                 plotFA, [pipeline[pipeName] for pipeName in allPipeName],
                 doGrid=True)

    batchList = ["coherenceHisto2D", "numberAnnotHisto2D"]
    batchExecute(batchList,
                 plotFA, [pipeline[pipeName] for pipeName in allPipeName],
                 doGrid=True)

    #Compare  Functional annotations
    compareFA = CompareFA()
    batchList = ["venn", "funcSim"]
    batchExecute(batchList, compareFA,
                 [pipeline[pipeName] for pipeName in ["COPSA", "AFFY", "B2G"]])

    #Plot statistics of the comparison between Functional annotations
    batchList = ["venn", "funcSymSim"]
    batchExecute(batchList, plotFA, compareFA,
                 [pipeline[pipeName] for pipeName in ["COPSA", "AFFY", "B2G"]])

    #-----------------------------------------------
    #Export statistics to Excel
    outDir = "%s/Export/%s" % (projectDir, organism)
    createDir(outDir)

    exportList = [
        "unconnected", "coverage", "numberAnnot", "richness", "coherence",
        "compactness", "specificity", "informationContent", "redundancy"
    ]
    reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism)
    reportFA.printStatistics([pipeline[pipeName] for pipeName in allPipeName],
                             exportList)
    reportFA.saveStatistics([pipeline[pipeName] for pipeName in allPipeName],
                            exportList)
示例#7
0
def GOFrequencyBovinePipelines(projectDir):
    """
    This function plot the frequency of GO terms in three bovine functional annotation
    """

    projectName = "bovinePipeline"
    organism = "bovine"

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info(
        "This function plot the frequency of GO terms from 3 functional annotations  for a Bovine array"
    )
    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )

    logger.info("name of the project : %s " % projectName)

    #Read bovine microarray probe set to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    #Read Functional annotations
    allFileName = list()
    allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" %
                       (projectDir, organism))
    allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism))
    allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism))

    allPipeName = ["AFFY", "B2G", "AID"]
    allFileType = allPipeName

    pipeline = dict()
    for pipeName, fileName, fileType in zip(allPipeName, allFileName,
                                            allFileType):
        FA = FuncAnnot(pipeName, refSet, G, organism=organism)
        FA.read(fileName, fileType=fileType)
        pipeline[pipeName] = FA

    #----------------------------------------------
    #Plot frequency of GO terms in a radial grah
    outDir = "%s/Graph/%s" % (projectDir, organism)
    logger.info("=================================================")
    logger.info("Plotting frequency of GO terms")
    logger.info("directory : %s" % outDir)
    for aspect in G.aspect:
        logger.info("%s : " % aspect)

        A = None
        for pipeName in allPipeName:
            l = array([
                log(1 + len(pipeline[pipeName].GOtoGP[aspect].get(go, [])))
                for go in G.get_NodesfromAspect(aspect)
            ])
            l = l / max(l) * 256.
            l = [int(round(n)) for n in l]
            freq = dict([(n, c)
                         for n, c in zip(G.get_NodesfromAspect(aspect), l)])

            figName = "%s/Frequency_%s_%s.png" % (
                outDir, pipeline[pipeName].name, aspect)
            A = G.plot_FrequencyGraph(aspect,
                                      freq,
                                      figName=figName,
                                      ttl="",
                                      graphviz=A)

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info("")
示例#8
0
def worseFunctionalSimilarity(projectDir):
    """
    This function identifies the ten most different annotation sets between Affymetrix and Blast2GO for a Bovine array
    """

    from AIGO.Similarity import GOSet_PWSimilarity
    from itertools import izip

    projectName = "bovinePipeline"
    organism = "bovine"

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info(
        "This function identifies the ten most different annotation sets between Affymetrix and Blast2GO for a Bovine array"
    )
    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )

    logger.info("name of the project : %s " % projectName)

    #Read bovine microarray probe set to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    fileName = "%s/Annotation/Affy_%s.na31.annot.csv" % (projectDir, organism)
    FA1 = FuncAnnot("AFFY", refSet, G, organism=organism)
    FA1.read(fileName, fileType="AFFY")

    fileName = "%s/Annotation/B2G_%s.annot" % (projectDir, organism)
    FA2 = FuncAnnot("B2G", refSet, G, organism=organism)
    FA2.read(fileName, fileType="B2G")

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    batchExecute(["removeUnconnected"], analyseFA, [FA1, FA2])

    outDir = "%s/Graph/%s/WorseFuncSim" % (projectDir, organism)
    createDir(outDir)

    N = 10
    logger.info("=================================================")
    logger.info("Plotting the %d most dissimilar annotation sets" % N)
    logger.info("directory : %s" % outDir)
    for aspect in G.aspect:

        commonGene = set(FA1.GPtoGO[aspect].keys()).intersection(
            FA2.GPtoGO[aspect].keys())

        logger.info("%s : processing %d annotation sets " %
                    (aspect, len(commonGene)))

        allD1, allD2 = list(), list()
        for i, g in enumerate(commonGene):
            sim, l = GOSet_PWSimilarity(G, FA1.GPtoGO[aspect][g],
                                        FA2.GPtoGO[aspect][g])

            allD1.append(l[0])
            allD2.append(l[1])

        allD = map(lambda D: ((array(D[0]) + array(D[1])) / 2.),
                   izip(allD1, allD2))

        idx = argsort(allD)

        for i in arange(0, N):
            gp = list(commonGene)[idx[i]]

            figName = "%s/%s_annotation_%s_from_%s_%s.png" % (
                outDir, aspect, gp, FA1.name, FA2.name)

            ttl = "%s annotations of %s from %s (green) and %s (red) : Functional similarity = %.2f" % (
                aspect.replace("_", " "), gp, FA1.name, FA2.name, allD[idx[i]])
            FA1.G.compare_InducedGraph(FA1.GPtoGO[aspect][gp],
                                       FA2.GPtoGO[aspect][gp],
                                       figName=figName,
                                       ttl=ttl)

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info("")
示例#9
0
def compareBovineAndRandom(projectDir):
    """
    This function compare the properties of 3 functional annotations for a Bovine array + a randomize version of Affymetrix functional annotations
    """

    projectName = "BovineAndRandom"
    organism = "bovine"

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info(
        "This function compare the properties of 3 functional annotations for a Bovine array + a randomize version of Affymetrix functional annotations."
    )
    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )

    logger.info("name of the project : %s " % projectName)

    #Read bovine microarray probe set to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    #Read Functional annotations
    allFileName = list()
    allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" %
                       (projectDir, organism))
    allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism))
    allFileName.append("%s//Annotation/AID_%s.txt" % (projectDir, organism))
    allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" %
                       (projectDir, organism))

    allPipeName = ["AFFY", "B2G", "AID", "resample"]
    allFileType = ["AFFY", "B2G", "AID", "AFFY"]

    pipeline = dict()
    for pipeName, fileName, fileType in zip(allPipeName, allFileName,
                                            allFileType):
        FA = FuncAnnot(pipeName, refSet, G, organism=organism)
        FA.read(fileName, fileType=fileType)
        pipeline[pipeName] = FA

    # Randimize FA
    randomizeFA = RandomizeFA()
    analyseFA = AnalyseFA()

    #-----------------------------------------------
    # Shuffle functional annotation
    batchList = ["sampleAnnotation"]
    batchExecute(batchList, randomizeFA,
                 [pipeline[pipeName] for pipeName in ["resample"]])

    batchList = ["coherence", "redundancy", "numberAnnot"]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Plot statistics of Functional annotations
    outDir = "%s/Graph/%s" % (projectDir, organism)
    createDir(outDir)
    plotFA = PlotFA(xlabel="Annotation pipelines",
                    outDir=outDir,
                    name="Resample",
                    organism=organism,
                    ext="png")
    batchExecute(batchList,
                 plotFA, [pipeline[pipeName] for pipeName in allPipeName],
                 doGrid=True)

    batchList = ["coherenceHisto2D", "numberAnnotHisto2D"]
    batchExecute(batchList,
                 plotFA, [pipeline[pipeName] for pipeName in allPipeName],
                 doGrid=True,
                 tit="")

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info("")
示例#10
0
def compareRandomizePipelines(projectDir):
    """
    This function compare the properties of 3 randomized functional annotations for a Bovine array. 
    """

    projectName = "randomizePipeline"
    organism = "bovine"

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info(
        "This function compare the properties of 3 randomized functional annotations for a Bovine array."
    )
    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )

    logger.info("name of the project : %s " % projectName)

    #Read rice microarray target sequence to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    #Read Functional annotations
    allFileName = list()
    allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" %
                       (projectDir, organism))
    allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism))
    allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism))

    allPipeName = ["AFFY", "B2G", "AID"]
    allFileType = allPipeName

    pipeline = dict()
    for pipeName, fileName, fileType in zip(allPipeName, allFileName,
                                            allFileType):
        FA = FuncAnnot(pipeName, refSet, G, organism=organism)
        FA.read(fileName, fileType=fileType)
        pipeline[pipeName] = FA

    # Randomize FA
    randomizeFA = RandomizeFA()

    #-----------------------------------------------
    # Shuffle functional annotation
    batchList = ["shuffleAnnotation"]
    batchExecute(batchList, randomizeFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    batchList = ["coherence", "redundancy"]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Export statistics to Excel
    outDir = "%s/Export/%s" % (projectDir, organism)
    createDir(outDir)
    exportList = ["coherence", "redundancy"]
    report = ReportFA(name="Randomize shuffle",
                      outDir=outDir,
                      organism=organism)
    report.printStatistics([pipeline[pipeName] for pipeName in allPipeName],
                           exportList)
    report.saveStatistics([pipeline[pipeName] for pipeName in allPipeName],
                          exportList)

    #-----------------------------------------------
    # Resample functional annotation
    batchList = ["sampleAnnotation"]
    batchExecute(batchList, randomizeFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Analyse Functional annotations
    #batchList=["obsolete", "unconnected", "removeUnconnected", "coverage", "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent"]
    batchList = [
        "obsolete", "unconnected", "removeUnconnected", "coverage", "richness",
        "numberAnnot", "redundancy", "specificity", "informationContent"
    ]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Export statistics to Excel
    outDir = "%s/Export/%s" % (projectDir, organism)
    createDir(outDir)
    #exportList=["coverage",  "numberAnnot",  "richness", "coherence",  "compactness", "specificity", "informationContent", "redundancy"]
    exportList = [
        "coverage", "numberAnnot", "richness", "specificity",
        "informationContent", "redundancy"
    ]
    report = ReportFA(name="Randomize sample",
                      outDir=outDir,
                      organism=organism)
    report.printStatistics([pipeline[pipeName] for pipeName in allPipeName],
                           exportList)
    report.saveStatistics([pipeline[pipeName] for pipeName in allPipeName],
                          exportList)

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info("")
示例#11
0
def compareBovinePipelines(projectDir):
    """
    This function compare the properties of 3 functional annotations for a Bovine array.
    """

    projectName = "bovinePipeline"
    organism = "bovine"

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info(
        "This function compare the properties of 3 functional annotations for a Bovine array."
    )
    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )

    logger.info("name of the project : %s " % projectName)

    #Read bovine microarray probe set to define the set of gene products
    fileName = "%s/ReferenceSet/%s.fasta" % (projectDir, organism)
    refSet = RefSet(organism=organism, fileName=fileName, refType="Fasta")

    #Read GO ontoloy
    fileName = "%s/OBO/go_daily-termdb.obo-xml" % (projectDir)
    G = readGOoboXML(fileName, force=False)

    #Read Functional annotations
    allFileName = list()
    allFileName.append("%s/Annotation/Affy_%s.na31.annot.csv" %
                       (projectDir, organism))
    allFileName.append("%s/Annotation/B2G_%s.annot" % (projectDir, organism))
    allFileName.append("%s/Annotation/AID_%s.txt" % (projectDir, organism))

    allPipeName = ["AFFY", "B2G", "AID"]
    allFileType = allPipeName

    pipeline = dict()
    for pipeName, fileName, fileType in zip(allPipeName, allFileName,
                                            allFileType):
        FA = FuncAnnot(pipeName, refSet, G, organism=organism)
        FA.read(fileName, fileType=fileType)
        pipeline[pipeName] = FA

    #-----------------------------------------------

    #Analyse Functional annotations
    analyseFA = AnalyseFA()
    #batchList=["obsolete", "unconnected", "removeUnconnected", "coverage",  "richness", "numberAnnot", "coherence", "redundancy", "compactness", "specificity", "informationContent"]
    batchList = [
        "obsolete", "unconnected", "removeUnconnected", "coverage", "richness",
        "numberAnnot", "redundancy", "specificity", "informationContent"
    ]
    batchExecute(batchList, analyseFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #How big are the largest annotation sets ?
    analyseFA.largestSet([pipeline[pipeName] for pipeName in allPipeName])
    logger.info("The largest sets of annotations are :")
    for pipeName in allPipeName:
        FA = pipeline[pipeName]
        logger.info("\t%d for %s" %
                    (FA['largestSet']['All_aspects_of_GO'], FA.name))

    #Plot statistics of Functional annotations
    outDir = "%s/Graph/%s" % (projectDir, organism)
    createDir(outDir)
    plotFA = PlotFA(xlabel="Annotation pipelines",
                    outDir=outDir,
                    name=projectName,
                    organism=organism,
                    ext="png")
    batchExecute(batchList,
                 plotFA, [pipeline[pipeName] for pipeName in allPipeName],
                 doGrid=True)

    #batchList=["coherenceHisto2D", "numberAnnotHisto2D"]
    batchList = ["numberAnnotHisto2D"]
    batchExecute(batchList,
                 plotFA, [pipeline[pipeName] for pipeName in allPipeName],
                 doGrid=True,
                 tit="")

    #-----------------------------------------------

    #Compare  Functional annotations
    compareFA = CompareFA()
    batchList = ["venn", "funcSim"]
    batchExecute(batchList, compareFA,
                 [pipeline[pipeName] for pipeName in allPipeName])

    #Plot statistics of the comparison between Functional annotations
    batchList = ["venn", "funcSymSim"]
    batchExecute(batchList,
                 plotFA,
                 compareFA, [pipeline[pipeName] for pipeName in allPipeName],
                 tit="")

    #-----------------------------------------------
    #Export statistics to Excel
    outDir = "%s/Export/%s" % (projectDir, organism)
    createDir(outDir)

    #exportList=["unconnected", "coverage",  "richness", "numberAnnot",  "coherence",  "compactness", "specificity", "informationContent", "redundancy"]
    exportList = [
        "unconnected", "coverage", "richness", "numberAnnot", "specificity",
        "informationContent", "redundancy"
    ]
    reportFA = ReportFA(outDir=outDir, name=projectName, organism=organism)
    reportFA.printStatistics([pipeline[pipeName] for pipeName in allPipeName],
                             exportList)
    reportFA.saveStatistics([pipeline[pipeName] for pipeName in allPipeName],
                            exportList)

    logger.info(
        "◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦◦"
    )
    logger.info("")
示例#12
0
文件: testAIGO.py 项目: wkpalan/aigo
#!/usr/bin/env python
from AIGO import logger

from AIGO.ReferenceSet import RefSet
from AIGO.FunctionalAnnotation import FuncAnnot
from AIGO.go.OBO import readGOoboXML

from AIGO.Analyse import AnalyseFA
from AIGO.Report import ReportFA

from AIGO.utils.Execute import batchExecute

refSet = RefSet(organism="platypus",
                fileName="platypus.refSet",
                refType="Text")
G = readGOoboXML("go_daily-termdb.obo-xml")
FA = FuncAnnot("platypusProject", refSet, G, organism="platypus")
FA.read("platypus.gaf", "GAF")

analyseFA = AnalyseFA()

analyseFA.largestSet([FA])
logger.info("Largest sets of annotations:")
logger.info("\t%d for %s" % (FA['largestSet']['All_aspects_of_GO'], FA.name))

batchList = [
    "coverage", "richness", "numberAnnot", "redundancy", "specificity",
    "informationContent", "hPrecision"
]
batchExecute(batchList, analyseFA, [FA])