示例#1
0
    def completeTest(self, treelikerArgs, processes=1):
        self.generateExamplesUnified()
        bestClassifiers = []
        terms = self.termsByDepth(
        )  # This sorting is needed later in bnet learning
        treeliker = TreeLikerWrapper(self, *treelikerArgs)

        def processTerm(term):
            return term, treeliker.runTermTest(term)

        nets = defaultdict(dict)
        allresults = tuple(parallel_map_dill(processes, processTerm, terms))
        combis = set()
        for term, learned in allresults:
            for clfName, i in learned:
                combis.add((term, clfName))
                #for clf, X_train, y_train, X_test, y_test, X_validation, y_validation, g_train, g_test, g_validation in folds:
                if clfName in nets[i]:
                    net = nets[i][clfName]
                else:
                    net = BayesNet(i, clfName, self)
                    nets[i][clfName] = net

                net.generateCPD(
                    term
                )  #, clf, X_train, y_train, X_test, y_test, X_validation, y_validation, g_train, g_test, g_validation)

        for i, byClf in sorted(nets.items()):
            for clfName, net in byClf.items():
                net.bake()
                net.predict()

        debug("Generating plots.")
        #for term, learned in allresults:
        #    for clfName, folds in learned.items():
        plt.figure(figsize=(6, 12))
        for term, clfName in combis:
            plt.clf()
            termN = self[term]['name']
            cvdir = getTermPath(termN)
            #folds2 = [(nets[i][clfName].nodeAsClf(term),)+f[1:] for i,f in enumerate(folds)]

            s1 = plt.subplot(211, adjustable='box', aspect=1)
            s1.axis('equal')
            #s1.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            plotRoc(termN, clfName, termN)
            s2 = plt.subplot(212, adjustable='box', aspect=1)
            s2.axis('equal')
            #s2.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            plotRoc(termN,
                    clfName,
                    "Bayes correction",
                    clfs=(nets[i][clfName].nodeAsClf(term)
                          for i in range(NUM_FOLDS)))
            #plotRoc("Bayes correction", folds2)
            print(str(cvdir / (clfName.replace(" ", "_") + '_roc.png')))
            plt.savefig(str(cvdir / (clfName.replace(" ", "_") + '_roc.png')))
            plt.savefig(str(cvdir / (clfName.replace(" ", "_") + '_roc.ps')))
        debug("Finished complete test.")
示例#2
0
    def completeTest(self, treelikerArgs, processes = 1):
        self.generateExamplesUnified()
        bestClassifiers = []
        terms = self.termsByDepth() # This sorting is needed later in bnet learning
        treeliker = TreeLikerWrapper(self, *treelikerArgs)
        def processTerm(term):
            return term, treeliker.runTermTest(term)
        
        nets = defaultdict(dict)
        allresults = tuple(parallel_map_dill(processes, processTerm, terms))
        combis = set()
        for term, learned in allresults:
            for clfName, i in learned:
                combis.add((term,clfName))
                #for clf, X_train, y_train, X_test, y_test, X_validation, y_validation, g_train, g_test, g_validation in folds:
                if clfName in nets[i]:
                    net = nets[i][clfName]
                else:
                    net = BayesNet(i, clfName, self)
                    nets[i][clfName] = net

                net.generateCPD(term)#, clf, X_train, y_train, X_test, y_test, X_validation, y_validation, g_train, g_test, g_validation) 

        for i, byClf in sorted(nets.items()):
            for clfName, net in byClf.items():
                net.bake()
                net.predict()

        debug("Generating plots.")
        #for term, learned in allresults:
        #    for clfName, folds in learned.items():
        plt.figure(figsize = (6,12))
        for term,clfName in combis:
            plt.clf()
            termN = self[term]['name']
            cvdir = getTermPath(termN)
            #folds2 = [(nets[i][clfName].nodeAsClf(term),)+f[1:] for i,f in enumerate(folds)]
            
            s1 = plt.subplot(211, adjustable='box', aspect=1)
            s1.axis('equal')
            #s1.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            plotRoc(termN, clfName, termN)
            s2 = plt.subplot(212, adjustable='box', aspect=1)
            s2.axis('equal')
            #s2.legend(loc='center left', bbox_to_anchor=(1, 0.5))
            plotRoc(termN, clfName, "Bayes correction",
                    clfs = (nets[i][clfName].nodeAsClf(term) for i in range(NUM_FOLDS) ))
            #plotRoc("Bayes correction", folds2)
            print(str(cvdir/(clfName.replace(" ","_")+'_roc.png')))
            plt.savefig(str(cvdir/(clfName.replace(" ","_")+'_roc.png')))
            plt.savefig(str(cvdir/(clfName.replace(" ","_")+'_roc.ps')))
        debug("Finished complete test.")
示例#3
0
    def runTermTest(self, term):
        term = self.ontology[term]['name']
        debug("Preparing for TreeLiker on term %s." % term)

        resultPath = getTermPath(term)
        batchPath = resultPath / 'batch.treeliker'

        datasetPath = resultPath / 'dataset.txt'

        batchFile = "set(algorithm, relf_grounding_counting)\n" \
                    "set(verbosity, %d)\n" \
                    "set(output_type, train_test)\n" \
                    "set(examples, '%s')\n" \
                    "set(template, [%s])\n" \
                    "set(use_sampling, true)\n" \
                    "set(num_samples, %d)\n" \
                    "set(sample_size, %d)\n" \
                    "set(covered_class, '%s')\n\n" % (
                        dp.utils.verbosity,
                        datasetPath.name,
                        self.template,
                        self.samples,
                        self.sample_size,
                        term)

        with datasetPath.open() as ds:
            dataSetLen = len([*ds])  # Counts lines

        for i, (train, test) in enumerate(
                cross_validation.KFold(dataSetLen, NUM_FOLDS)):
            path = resultPath / str(i)
            if not path.is_dir():
                path.mkdir()

            batchFile += "set(output, '%s')\n" \
                         "set(train_set, [%s])\n" \
                         "set(test_set, [%s])\n" \
                         "work(yes)\n" % (
                             path.name,
                             ",".join(map(str,train)),
                             ",".join(map(str,test)))

        with batchPath.open('w') as bf:
            bf.write(batchFile)

        self._runTreeLiker(resultPath, batchPath)

        return learningTest(resultPath)
示例#4
0
    def runTermTest(self, term):
        term = self.ontology[term]['name']
        debug("Preparing for TreeLiker on term %s." % term)

        resultPath = getTermPath(term)
        batchPath = resultPath / 'batch.treeliker'

        datasetPath = resultPath / 'dataset.txt'

        batchFile = "set(algorithm, relf_grounding_counting)\n" \
                    "set(verbosity, %d)\n" \
                    "set(output_type, train_test)\n" \
                    "set(examples, '%s')\n" \
                    "set(template, [%s])\n" \
                    "set(use_sampling, true)\n" \
                    "set(num_samples, %d)\n" \
                    "set(sample_size, %d)\n" \
                    "set(covered_class, '%s')\n\n" % (
                        dp.utils.verbosity,
                        datasetPath.name,
                        self.template,
                        self.samples,
                        self.sample_size,
                        term)

        with datasetPath.open() as ds:
            dataSetLen = len([*ds]) # Counts lines

        for i, (train, test) in enumerate(cross_validation.KFold(dataSetLen, NUM_FOLDS)):
            path = resultPath / str(i)
            if not path.is_dir():
                path.mkdir()
                
            batchFile += "set(output, '%s')\n" \
                         "set(train_set, [%s])\n" \
                         "set(test_set, [%s])\n" \
                         "work(yes)\n" % (
                             path.name,
                             ",".join(map(str,train)),
                             ",".join(map(str,test)))

        with batchPath.open('w') as bf:
            bf.write(batchFile)

        self._runTreeLiker(resultPath, batchPath)

        return learningTest(resultPath)
示例#5
0
 def generateExamplesUnified(self):
     #return
     debug("Generating unified datasets.")
     terms = self.termsByDepth(False)
     #rootname = self.ontology[self.root]['name']
     with ExitStack() as stack: # Closes all files when exited
         files = [(term, stack.enter_context((getTermPath(term) / 'dataset.txt').open('w')))
                 for term
                 in (self[t]['name'] for t in self.ontology.keys())
                 ]#if term != rootname]
         #for i, geneName in enumerate(self.genes):
         for geneName in self.genes:
             #debug("%d. Writing gene %s." % (i, geneName))
             gene = self.geneFactory.getGene(geneName)
             repg = ", ".join(gene.logicalRepresentation())
             for term, output in files:
                 if geneName not in self.associations[term]:
                     term = '~'+term
                 e = '"%s" %s' % (term, repg)
                 print(e, file=output)
示例#6
0
 def generateExamplesUnified(self):
     #return
     debug("Generating unified datasets.")
     terms = self.termsByDepth(False)
     #rootname = self.ontology[self.root]['name']
     with ExitStack() as stack:  # Closes all files when exited
         files = [
             (term,
              stack.enter_context(
                  (getTermPath(term) / 'dataset.txt').open('w')))
             for term in (self[t]['name'] for t in self.ontology.keys())
         ]  #if term != rootname]
         #for i, geneName in enumerate(self.genes):
         for geneName in self.genes:
             #debug("%d. Writing gene %s." % (i, geneName))
             gene = self.geneFactory.getGene(geneName)
             repg = ", ".join(gene.logicalRepresentation())
             for term, output in files:
                 if geneName not in self.associations[term]:
                     term = '~' + term
                 e = '"%s" %s' % (term, repg)
                 print(e, file=output)