def main(): print "loading" annotations = annotation_reader.from_file("%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME) table = annotations.as_orange_table() cv_indices = orange.MakeRandomIndices2(table, p0=0.5) print "indices", set(cv_indices) print "splitting" training, testing = annotation_reader.split(annotations, cv_indices) print "features" engine = PairwiseEngine(training) training_table = engine.training_table testing_table = engine.makeTable(testing) print len(training_table), "training" print len(testing_table), "testing" learners = [orange.MajorityLearner(), orngEnsemble.RandomForestLearner(), ] results = orngTest.learnAndTestOnTestData(learners, training_table, testing_table) for accuracy, cm in zip(orngStat.CA(results), orngStat.confusionMatrices(results)): print orangeUtils.confusion_matrix_to_string(table.domain, cm) print "accuracy: %.2f%%" % (accuracy*100)
def test_mammal(self): d = orange.ExampleTable("zoo") cc = orange.MajorityLearner(d) cd = orange.get_class_distribution(d) cd.normalize() for e in d: self.assertEqual(cc(e), "mammal") self.assertEqual(cc(e, orange.Classifier.GetProbabilities), cd)
def __init__(self, data): if type(data) is not orange.ExampleTable: raise TypeError('Data is not an orange.ExampleTable') if data.domain.classVar.varType != orange.VarTypes.Discrete: raise TypeError('Data should have a discrete target variable.') self.data = data self.majorityClassifier = orange.MajorityLearner(self.data) self.rulesClass = [] # list of istances SDRules self.algorithm = "Subgroup discovery algorithm"
def __call__(self, data, weight=0): import orngWrap type = getattr(self, "type", "auto") if hasattr(self, "boundsize"): if type(self) == int: subgen = orange.SubsetsGenerator_constSize(B=self.boundsize) else: subgen = orange.SubsetsGenerator_minMaxSize( min=self.boundsize[0], max=self.boundsize[1]) else: subgen = orange.SubsetsGenerator_constSize(B=2) if type == "auto": im = orange.IMBySorting(data, []) if im.fuzzy(): type = "error" else: type = "complexity" inducer = StructureInducer( removeDuplicates=1, redundancyRemover=AttributeRedundanciesRemover(), learnerForUnknown=orange.MajorityLearner()) if type == "complexity": inducer.featureInducer = FeatureByMinComplexity() return inducer(data, weight) elif type == "error": ms = getattr( self, "m", orange.frange(0.1) + orange.frange(1.2, 3.0, 0.2) + orange.frange(4.0, 10.0, 1.0)) inducer.redundancyRemover.inducer = inducer.featureInducer = FeatureByMinError( ) # it's the same object for redundancy remover and the real inducer, so we can tune just one return orngWrap.Tune1Parameter( parameter="featureInducer.m", values=ms, object=inducer, returnWhat=orngWrap.Tune1Parameter.returnClassifier)(data, weight) print(inducer.featureInducer.m, inducer.redundancyRemover.inducer.m) return inducer(data, weight)
def __init__(self, map=[], examples=[]): self.map = map self.examples = examples for node in map: node.referenceExample = orange.Example(orange.Domain(examples.domain.attributes, False), [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \ for var, value in zip(examples.domain.attributes, node.vector)]) node.examples = orange.ExampleTable(examples.domain) for ex in examples: node = self.getBestMatchingNode(ex) node.examples.append(ex) if examples and examples.domain.classVar: for node in self.map: node.classifier = orange.MajorityLearner(node.examples)
def main(): print "loading" annotations = annotation_reader.from_file( "%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME) annotator2 = annotation_reader.from_file( "%s/data/directions/breadbox/nouns_dlaude.partial.txt" % TKLIB_HOME) #histogram(annotations) print "table" table = annotations.as_orange_table() cv_indices = orange.MakeRandomIndices2(table, p0=0.5) print "indices", set(cv_indices) print "splitting" training, testing = annotation_reader.split(annotations, cv_indices) print "features" engine = WordnetParentsEngine(training) training_table = engine.makeTable(training) testing_table = engine.makeTable(testing) #training_table, testing_table = wordnet_parents(training, testing) #training_table, testing_table = wordnet_glosses(training, testing) #training_table, testing_table = flickr_parents(training, testing) print len(training_table), "training examples" print len(testing_table), "testing examples" #training_table = annotation_reader.to_big_small(training_table) #testing_table = annotation_reader.to_big_small(testing_table) #information_gain = orange.MeasureAttribute_info() #for x in training_table.domain.attributes: # print "x", information_gain(x, training_table) learners = [ orange.MajorityLearner(), orngEnsemble.RandomForestLearner(), WordnetKnnClassifier, agreement.WizardOfOzLearner(annotator2.as_orange_table()) ] results = orngTest.learnAndTestOnTestData(learners, training_table, testing_table) for accuracy, cm in zip(orngStat.CA(results), orngStat.confusionMatrices(results)): print orangeUtils.confusion_matrix_to_string(table.domain, cm) print "accuracy: %.2f%%" % (accuracy * 100)
def __init__(self, map=[], data=[]): self.map = map self.data = data for node in map: node.reference_instance = orange.Example(orange.Domain(self.data.domain.attributes, False), [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \ for var, value in zip(self.data.domain.attributes, node.vector)]) node.instances = orange.ExampleTable(self.data.domain) for inst in self.data: node = self.get_best_matching_node(inst) node.instances.append(inst) if self.data and self.data.domain.class_var: for node in self.map: node.classifier = orange.MajorityLearner( node.instances if node.instances else self.data) self.class_var = self.data.domain.class_var else: self.class_var = None
def test_equal(self): d = orange.ExampleTable("iris") cc = orange.MajorityLearner(d) for e in d[0:150:20]: anss = set() for i in range(5): anss.add(cc(e)) self.assertEqual(len(anss), 1) anss = set() for e in d: anss.add(cc(e)) self.assertEqual(len(anss), 3) for e in d[0:150:20]: self.assertTrue(all(x==1/3 for x in cc(e, orange.Classifier.GetProbabilities))) import pickle s = pickle.dumps(cc) cc2 = pickle.loads(s) for e in d: self.assertEqual(cc(e), cc2(e))
def testTrecentoSimpler(): import orange, orngTree # @UnusedImport @UnresolvedImport trainData = orange.ExampleTable('d:/desktop/trecento2.tab') testData = orange.ExampleTable('d:/desktop/trecento1.tab') majClassifier = orange.MajorityLearner(trainData) knnClassifier = orange.kNNLearner(trainData) majWrong = 0 knnWrong = 0 for testRow in testData: majGuess = majClassifier(testRow) knnGuess = knnClassifier(testRow) realAnswer = testRow.getclass() if majGuess != realAnswer: majWrong += 1 if knnGuess != realAnswer: knnWrong += 1 total = float(len(testData)) print (majWrong/total, knnWrong/total)
def xtestChinaEuropeSimpler(): import orange, orngTree # @UnusedImport @UnresolvedImport trainData = orange.ExampleTable('ismir2011_fb_folkTrain.tab') testData = orange.ExampleTable('ismir2011_fb_folkTest.tab') majClassifier = orange.MajorityLearner(trainData) knnClassifier = orange.kNNLearner(trainData) majWrong = 0 knnWrong = 0 for testRow in testData: majGuess = majClassifier(testRow) knnGuess = knnClassifier(testRow) realAnswer = testRow.getclass() if majGuess != realAnswer: majWrong += 1 if knnGuess != realAnswer: knnWrong += 1 total = float(len(testData)) print (majWrong/total, knnWrong/total)
# Classes: orngTest.crossValidation, orngTree.TreeLearner, orange.kNNLearner, orngRegression.LinearRegressionLearner # Referenced: regression.htm import orange import orngRegression import orngTree import orngStat, orngTest data = orange.ExampleTable("housing") # definition of learners (regressors) lr = orngRegression.LinearRegressionLearner(name="lr") rt = orngTree.TreeLearner(measure="retis", mForPruning=2, minExamples=20, name="rt") maj = orange.MajorityLearner(name="maj") knn = orange.kNNLearner(k=10, name="knn") learners = [maj, lr, rt, knn] # evaluation and reporting of scores results = orngTest.crossValidation(learners, data, folds=10) scores = [("MSE", orngStat.MSE), ("RMSE", orngStat.RMSE), ("MAE", orngStat.MAE), ("RSE", orngStat.RSE), ("RRSE", orngStat.RRSE), ("RAE", orngStat.RAE), ("R2", orngStat.R2)] print "Learner " + "".join(["%-7s" % s[0] for s in scores]) for i in range(len(learners)): print "%-8s " % learners[i].name + "".join( ["%6.3f " % s[1](results)[i] for s in scores])
# Description: Builds regression models from data and outputs predictions for first five instances # Category: modelling # Uses: housing # Classes: MakeRandomIndices2, MajorityLearner, orngTree.TreeLearner, orange.kNNLearner # Referenced: regression.htm import orange, orngTree, orngTest, orngStat data = orange.ExampleTable("housing.tab") selection = orange.MakeRandomIndices2(data, 0.5) train_data = data.select(selection, 0) test_data = data.select(selection, 1) maj = orange.MajorityLearner(train_data) maj.name = "default" rt = orngTree.TreeLearner(train_data, measure="retis", mForPruning=2, minExamples=20) rt.name = "reg. tree" k = 5 knn = orange.kNNLearner(train_data, k=k) knn.name = "k-NN (k=%i)" % k regressors = [maj, rt, knn] print "\n%10s " % "original", for r in regressors: print "%10s " % r.name, print for i in range(10):
import orange, orngTest, orngStat import random data = orange.ExampleTable("voting") bayes = orange.BayesLearner(name="bayes") tree = orange.TreeLearner(name="tree") majority = orange.MajorityLearner(name="default") learners = [bayes, tree, majority] names = [x.name for x in learners] def printResults(res): CAs = orngStat.CA(res, reportSE=1) for i in range(len(names)): print "%s: %5.3f+-%5.3f " % (names[i], CAs[i][0], 1.96 * CAs[i][1]), print print "\nproportionsTest that will always give the same results" for i in range(3): res = orngTest.proportionTest(learners, data, 0.7) printResults(res) print "\nproportionsTest that will give different results, but the same each time the script is run" myRandom = orange.RandomGenerator() for i in range(3): res = orngTest.proportionTest(learners, data, 0.7, randomGenerator=myRandom)
def majority_learner(input_dict): import orange output_dict = {} output_dict['majorout'] = orange.MajorityLearner( name="Majority Classifier (Orange)") return output_dict
p = max(maxp) # max class probability classifier_index = maxp.index(p) c = pmatrix[classifier_index].modus() if resultType == orange.GetValue: return c elif resultType == orange.getClassDistribution: return pmatrix[classifier_index] else: return (c, pmatrix[classifier_index]) tree = orngTree.TreeLearner(mForPruning=5.0) tree.name = 'class. tree' bayes = orange.BayesLearner() bayes.name = 'naive bayes' winner = WinnerLearner(learners=[tree, bayes]) winner.name = 'winner' majority = orange.MajorityLearner() majority.name = 'default' learners = [majority, tree, bayes, winner] data = orange.ExampleTable("promoters") results = orngTest.crossValidation(learners, data) print "Classification Accuracy:" for i in range(len(learners)): print ("%15s: %5.3f") % (learners[i].name, orngStat.CA(results)[i])
if __name__=="__main__": a = QApplication(sys.argv) ow = OWPredictions() ow.show() import orngTree dataset = orange.ExampleTable('../../doc/datasets/iris.tab') # dataset = orange.ExampleTable('../../doc/datasets/auto-mpg.tab') ind = orange.MakeRandomIndices2(p0=0.5)(dataset) data = dataset.select(ind, 0) test = dataset.select(ind, 1) testnoclass = orange.ExampleTable(orange.Domain(test.domain.attributes, False), test) tree = orngTree.TreeLearner(data) tree.name = "tree" maj = orange.MajorityLearner(data) maj.name = "maj" knn = orange.kNNLearner(data, k = 10) knn.name = "knn" # ow.setData(test) # # ow.setPredictor(maj, 1) if 1: # data set only ow.setData(test) if 0: # two predictors, test data with class ow.setPredictor(maj, 1) ow.setPredictor(tree, 2)
print imputer(data[19]) print impdata = imputer(data) for i in range(20, 25): print data[i] print impdata[i] print print "\n*** BAYES and AVERAGE IMPUTATION ***\n" imputer = orange.ImputerConstructor_model() imputer.learnerContinuous = orange.MajorityLearner() imputer.learnerDiscrete = orange.BayesLearner() imputer = imputer(data) print "Example w/ missing values" print data[19] print "Imputed:" print imputer(data[19]) print impdata = imputer(data) for i in range(20, 25): print data[i] print impdata[i] print
# Description: Demostrates the use of classification scores # Category: evaluation # Uses: voting.tab # Referenced: orngStat.htm import orange, orngTest, orngTree learners = [orange.BayesLearner(name = "bayes"), orngTree.TreeLearner(name="tree"), orange.MajorityLearner(name="majrty")] voting = orange.ExampleTable("voting") res = orngTest.crossValidation(learners, voting) vehicle = orange.ExampleTable("vehicle") resVeh = orngTest.crossValidation(learners, vehicle) import orngStat CAs = orngStat.CA(res) APs = orngStat.AP(res) Briers = orngStat.BrierScore(res) ISs = orngStat.IS(res) print print "method\tCA\tAP\tBrier\tIS" for l in range(len(learners)): print "%s\t%5.3f\t%5.3f\t%5.3f\t%6.3f" % (learners[l].name, CAs[l], APs[l], Briers[l], ISs[l]) CAs = orngStat.CA(res, reportSE=True)
def __call__(self, data, weight=None): if not self.use_attributes is None: new_domain = orange.Domain(self.use_attributes, data.domain.classVar) new_domain.addmetas(data.domain.getmetas()) data = orange.ExampleTable(new_domain, data) if self.stepwise and self.stepwise_before: use_attributes = stepwise(data, add_sig=self.add_sig, remove_sig=self.remove_sig) new_domain = orange.Domain(use_attributes, data.domain.classVar) new_domain.addmetas(data.domain.getmetas()) data = orange.ExampleTable(new_domain, data) # continuization (replaces discrete with continuous attributes) continuizer = orange.DomainContinuizer() continuizer.multinomialTreatment = continuizer.FrequentIsBase continuizer.zeroBased = True domain0 = continuizer(data) data = data.translate(domain0) if self.stepwise and not self.stepwise_before: use_attributes = stepwise(data, weight, add_sig=self.add_sig, remove_sig=self.remove_sig) new_domain = orange.Domain(use_attributes, data.domain.classVar) new_domain.addmetas(data.domain.getmetas()) data = orange.ExampleTable(new_domain, data) # missing values handling (impute missing) imputer = orange.ImputerConstructor_model() imputer.learnerContinuous = orange.MajorityLearner() imputer.learnerDiscrete = orange.MajorityLearner() imputer = imputer(data) data = imputer(data) # convertion to numpy A, y, w = data.toNumpy() # weights ?? if A is None: n = len(data) m = 0 else: n, m = numpy.shape(A) if self.beta0 == True: if A is None: X = numpy.ones([len(data), 1]) else: X = numpy.insert(A, 0, 1, axis=1) # adds a column of ones else: X = A # set weights W = numpy.identity(len(data)) if weight: for di, d in enumerate(data): W[di, di] = float(d[weight]) D = dot( dot(numpy.linalg.pinv(dot(dot(X.T, W), X)), X.T), W ) # adds some robustness by computing the pseudo inverse; normal inverse could fail due to singularity of the X.T*W*X beta = dot(D, y) yEstimated = dot(X, beta) # estimation # some desriptive statistisc muY, sigmaY = numpy.mean(y), numpy.std(y) muX, covX = numpy.mean(X, axis=0), numpy.cov(X, rowvar=0) # model statistics SST, SSR = numpy.sum((y - muY)**2), numpy.sum((yEstimated - muY)**2) SSE, RSquare = SST - SSR, SSR / SST R = numpy.sqrt(RSquare) # coefficient of determination RAdjusted = 1 - (1 - RSquare) * (n - 1) / (n - m - 1) F = (SSR / m) / (SST - SSR / (n - m - 1)) # F statistisc df = m - 1 sigmaSquare = SSE / (n - m - 1) # standard error of estimated coefficients errCoeff = sqrt(sigmaSquare * inv(dot(X.T, X)).diagonal()) # t statistisc, significance t = beta / errCoeff df = n - 2 significance = [] for tt in t: try: significance.append( statc.betai(df * 0.5, 0.5, df / (df + tt * tt))) except: significance.append(1.0) # standardized coefficients if m > 0: stdCoeff = (sqrt(covX.diagonal()) / sigmaY) * beta else: stdCoeff = (sqrt(covX) / sigmaY) * beta model = { 'descriptives': { 'meanX': muX, 'covX': covX, 'meanY': muY, 'sigmaY': sigmaY }, 'model': { 'estCoeff': beta, 'stdErrorEstimation': errCoeff }, 'model summary': { 'TotalVar': SST, 'ExplVar': SSE, 'ResVar': SSR, 'R': R, 'RAdjusted': RAdjusted, 'F': F, 't': t, 'sig': significance } } return LinearRegression(statistics=model, domain=data.domain, name=self.name, beta0=self.beta0, imputer=imputer)
def __init__(self, model=None, **kwargs): self.model = orange.MajorityLearner() if model is None else model
class TestImputeByLearner(testing.PreprocessorTestCase): PREPROCESSOR = Preprocessor_imputeByLearner( learner=orange.MajorityLearner())
correct = [0.0]*len(classifiers) for ex in test_data: for i in range(len(classifiers)): if classifiers[i](ex) == ex.getclass(): correct[i] += 1 for i in range(len(correct)): correct[i] = 100.0*correct[i] / len(test_data) return correct # set up the classifiers trainD = orange.ExampleTable(trainData) devD = orange.ExampleTable(devData) testD = orange.ExampleTable(testData) majority = orange.MajorityLearner(trainD) majority.name = 'majority ' print majority.name ##tree = orngTree.TreeLearner(trainD, measure='gainRatio', binarization=0, minSubset=5, minExamples=5, sameMajorityPruning=1, mForPruning=5); ##tree.name = "tree - gainRatio " ##f = file(trainData+'o.txt.tree', 'w') ##f.write(orngTree.dumpTree(tree, leafStr='%V (%^.2m% = %.0M out of %.0N)')) ##f.close() ##print tree.name ##treeC45 = orange.C45Learner(trainD, minObjs=5) ##treeC45.name = "tree - C45 " ##f = file(trainData+'o.txt.C45tree', 'w') ##dumpC45Tree(treeC45,f) ##f.close()
import sys sys.path.append('../') import database import warnings warnings.simplefilter("ignore") import math from queryconstructor import QueryConstructor from plotconstructor import * import orange warnings.filterwarnings("ignore", "", orange.AttributeWarning) learner_data = orange.ExampleTable("learner_data") print "starting learning" treeLearner = orange.TreeLearner(learner_data) majorityLearner = orange.MajorityLearner(learner_data) print "done with learning" eval_data = orange.ExampleTable("eval_data") total=0.0 total_lame=0.0 total_true=0.0 count=0.0 print "starting evaluation" for i in eval_data: actual = i.getclass() if (treeLearner(i)==actual): total+=1 if (majorityLearner(i)==actual): total_lame+=1