def __call__(self, data, weight=0): import orngWrap type = getattr(self, "type", "auto") if hasattr(self, "boundsize"): if type(self) == int: subgen = orange.SubsetsGenerator_constSize(B=self.boundsize) else: subgen = orange.SubsetsGenerator_minMaxSize( min=self.boundsize[0], max=self.boundsize[1]) else: subgen = orange.SubsetsGenerator_constSize(B=2) if type == "auto": im = orange.IMBySorting(data, []) if im.fuzzy(): type = "error" else: type = "complexity" inducer = StructureInducer( removeDuplicates=1, redundancyRemover=AttributeRedundanciesRemover(), learnerForUnknown=orange.MajorityLearner()) if type == "complexity": inducer.featureInducer = FeatureByMinComplexity() return inducer(data, weight) elif type == "error": ms = getattr( self, "m", orange.frange(0.1) + orange.frange(1.2, 3.0, 0.2) + orange.frange(4.0, 10.0, 1.0)) inducer.redundancyRemover.inducer = inducer.featureInducer = FeatureByMinError( ) # it's the same object for redundancy remover and the real inducer, so we can tune just one return orngWrap.Tune1Parameter( parameter="featureInducer.m", values=ms, object=inducer, returnWhat=orngWrap.Tune1Parameter.returnClassifier)(data, weight) print(inducer.featureInducer.m, inducer.redundancyRemover.inducer.m) return inducer(data, weight)
def learningCurveWithTestData( learners, learnset, testset, times=10, proportions=orange.frange(0.1), strat=orange.MakeRandomIndices.StratifiedIfPossible, pps=[], **argkw): verb = argkw.get("verbose", 0) learnset, learnweight = demangleExamples(learnset) testweight = demangleExamples(testset)[1] randomGenerator = argkw.get("indicesrandseed", 0) or argkw.get( "randseed", 0) or argkw.get("randomGenerator", 0) pick = orange.MakeRandomIndices2(stratified=strat, randomGenerator=randomGenerator) allResults = [] for p in proportions: printVerbose("Proportion: %5.3f" % p, verb) testResults = ExperimentResults( times, [l.name for l in learners], testset.domain.classVar.values.native(), testweight != 0, testset.domain.classVar.baseValue) testResults.results = [] for t in range(times): printVerbose(" repetition %d" % t, verb) learnAndTestOnTestData(learners, (learnset.selectref( pick(learnset, p), 0), learnweight), testset, testResults, t) allResults.append(testResults) return allResults
def learningCurveN(learners, examples, folds=10, strat=orange.MakeRandomIndices.StratifiedIfPossible, proportions=orange.frange(0.1), pps=[], **argkw): """construct a learning curve for learners""" seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1) if seed: randomGenerator = orange.RandomGenerator(seed) else: randomGenerator = argkw.get("randomGenerator", orange.RandomGenerator()) if strat: cv = orange.MakeRandomIndicesCV(folds=folds, stratified=strat, randomGenerator=randomGenerator) pick = orange.MakeRandomIndices2(stratified=strat, randomGenerator=randomGenerator) else: cv = orange.RandomIndicesCV(folds=folds, stratified=strat, randomGenerator=randomGenerator) pick = orange.RandomIndices2(stratified=strat, randomGenerator=randomGenerator) return learningCurve(*(learners, examples, cv, pick, proportions, pps), **argkw)
def __call__(self, data, weight=0): import orngWrap type=getattr(self, "type", "auto") if hasattr(self, "boundsize"): if type(self)==int: subgen=orange.SubsetsGenerator_constSize(B = self.boundsize) else: subgen=orange.SubsetsGenerator_minMaxSize(min = self.boundsize[0], max = self.boundsize[1]) else: subgen=orange.SubsetsGenerator_constSize(B = 2) if type=="auto": im=orange.IMBySorting(data, []) if im.fuzzy(): type="error" else: type="complexity" inducer=StructureInducer(removeDuplicates = 1, redundancyRemover = AttributeRedundanciesRemover(), learnerForUnknown = orange.MajorityLearner() ) if type=="complexity": inducer.featureInducer = FeatureByMinComplexity() return inducer(data, weight) elif type=="error": ms=getattr(self, "m", orange.frange(0.1)+orange.frange(1.2, 3.0, 0.2)+orange.frange(4.0, 10.0, 1.0)) inducer.redundancyRemover.inducer=inducer.featureInducer = FeatureByMinError() # it's the same object for redundancy remover and the real inducer, so we can tune just one return orngWrap.Tune1Parameter( parameter = "featureInducer.m", values = ms, object = inducer, returnWhat = orngWrap.Tune1Parameter.returnClassifier )(data, weight) print inducer.featureInducer.m, inducer.redundancyRemover.inducer.m return inducer(data, weight)
def learningCurve(learners, examples, cv=None, pick=None, proportions=orange.frange(0.1), pps=[], **argkw): verb = argkw.get("verbose", 0) cache = argkw.get("cache", 0) callback = argkw.get("callback", 0) for pp in pps: if pp[0] != "L": raise SystemError("cannot preprocess testing examples") if not cv or not pick: seed = argkw.get("indicesrandseed", -1) or argkw.get("randseed", -1) if seed: randomGenerator = orange.RandomGenerator(seed) else: randomGenerator = argkw.get("randomGenerator", orange.RandomGenerator()) if not cv: cv = orange.MakeRandomIndicesCV( folds=10, stratified=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=randomGenerator) if not pick: pick = orange.MakeRandomIndices2( stratified=orange.MakeRandomIndices.StratifiedIfPossible, randomGenerator=randomGenerator) examples, weight = demangleExamples(examples) folds = cv(examples) ccsum = hex(examples.checksum())[2:] ppsp = encodePP(pps) nLrn = len(learners) allResults = [] for p in proportions: printVerbose("Proportion: %5.3f" % p, verb) if (cv.randseed < 0) or (pick.randseed < 0): cache = 0 else: fnstr = "{learningCurve}_%s_%s_%s_%s%s-%s" % ( "%s", p, cv.randseed, pick.randseed, ppsp, ccsum) if "*" in fnstr: cache = 0 conv = examples.domain.classVar.varType == orange.VarTypes.Discrete and int or float testResults = ExperimentResults( cv.folds, [l.name for l in learners], examples.domain.classVar.values.native(), weight != 0, examples.domain.classVar.baseValue) testResults.results = [ TestedExample(folds[i], conv(examples[i].getclass()), nLrn, examples[i].getweight(weight)) for i in range(len(examples)) ] if cache and testResults.loadFromFiles(learners, fnstr): printVerbose(" loaded from cache", verb) else: for fold in range(cv.folds): printVerbose(" fold %d" % fold, verb) # learning learnset = examples.selectref(folds, fold, negate=1) learnset = learnset.selectref(pick(learnset, p0=p), 0) if not len(learnset): continue for pp in pps: learnset = pp[1](learnset) classifiers = [None] * nLrn for i in range(nLrn): if not cache or not testResults.loaded[i]: classifiers[i] = learners[i](learnset, weight) # testing for i in range(len(examples)): if (folds[i] == fold): # This is to prevent cheating: ex = orange.Example(examples[i]) ex.setclass("?") for cl in range(nLrn): if not cache or not testResults.loaded[cl]: cls, pro = classifiers[cl](ex, orange.GetBoth) testResults.results[i].setResult(cl, cls, pro) if callback: callback() if cache: testResults.saveToFiles(learners, fnstr) allResults.append(testResults) return allResults
res = orngTest.proportionTest(learners, data, 0.7, 100, storeClassifiers=1) print "#iter %i, #classifiers %i" % (len( res.classifiers), len(res.classifiers[0])) print ##print "\nLearning with 100% class noise" ##classnoise = orange.Preprocessor_addClassNoise(proportion=1.0) ##res = orngTest.proportionTest(learners, data, 0.7, 100, pps = [("L", classnoise)]) ##printResults(res) print "\nGood old 10-fold cross validation" res = orngTest.crossValidation(learners, data) printResults(res) print "\nLearning curve" prop = orange.frange(0.2, 1.0, 0.2) res = orngTest.learningCurveN(learners, data, folds=5, proportions=prop) for i in range(len(prop)): print "%5.3f:" % prop[i], printResults(res[i]) print "\nLearning curve with pre-separated data" indices = orange.MakeRandomIndices2(data, p0=0.7) train = data.select(indices, 0) test = data.select(indices, 1) res = orngTest.learningCurveWithTestData(learners, train, test, times=5, proportions=prop) for i in range(len(prop)):
res = orngTest.proportionTest(learners, data, 0.7, 100, storeClassifiers = 1) print "#iter %i, #classifiers %i" % (len(res.classifiers), len(res.classifiers[0]) if len(res.classifiers) > 0 else -1) print ##print "\nLearning with 100% class noise" ##classnoise = orange.Preprocessor_addClassNoise(proportion=1.0) ##res = orngTest.proportionTest(learners, data, 0.7, 100, pps = [("L", classnoise)]) ##printResults(res) print "\nGood old 10-fold cross validation" res = orngTest.crossValidation(learners, data) printResults(res) print "\nLearning curve" prop = orange.frange(0.2, 1.0, 0.2) res = orngTest.learningCurveN(learners, data, folds = 5, proportions = prop) for i in range(len(prop)): print "%5.3f:" % prop[i], printResults(res[i]) print "\nLearning curve with pre-separated data" indices = orange.MakeRandomIndices2(data, p0 = 0.7) train = data.select(indices, 0) test = data.select(indices, 1) res = orngTest.learningCurveWithTestData(learners, train, test, times = 5, proportions = prop) for i in range(len(prop)): print "%5.3f:" % prop[i], printResults(res[i])