def add(disc1, disc2, sumd): disc = orange.DiscDistribution(disc1) sumdisc = sumd for i, d in enumerate(disc): disc[i] += disc2[i] sumdisc += disc2[i] return disc, sumdisc
def __call__(self, example, result_type=orange.GetValue, retRules=False): def add(disc1, disc2, sumd): disc = orange.DiscDistribution(disc1) sumdisc = sumd for i, d in enumerate(disc): disc[i] += disc2[i] sumdisc += disc2[i] return disc, sumdisc # create empty distribution retDist = orange.DiscDistribution(self.examples.domain.classVar) covRules = orange.RuleList() # iterate through examples - add distributions sumdisc = 0. for r in self.rules: if r(example) and r.classDistribution: retDist, sumdisc = add(retDist, r.classDistribution, sumdisc) covRules.append(r) if not sumdisc: retDist = self.prior sumdisc = self.prior.abs for c in self.examples.domain.classVar: retDist[c] /= sumdisc if retRules: if result_type == orange.GetValue: return (retDist.modus(), covRules) if result_type == orange.GetProbabilities: return (retDist, covRules) return (retDist.modus(), retDist, covRules) if result_type == orange.GetValue: return retDist.modus() if result_type == orange.GetProbabilities: return retDist return (retDist.modus(), retDist)
def test_discrete(self): d = orange.ExampleTable("iris") dist = orange.Distribution(d.domain.class_var, d) cc = orange.RandomClassifier(d.domain.class_var) self.assertEqual(cc.probabilities.variable, cc.class_var) cc2 = orange.RandomClassifier(None, dist) self.assertEqual(cc2.class_var, d.domain.class_var) self.assertEqual(cc2.probabilities.variable, cc2.class_var) self.assertEqual(id(cc2.probabilities), id(dist)) self.assertTrue(all(x==50 for x in cc2.probabilities)) for cl in [cc, cc2]: for e in d[0:150:20]: anss = set() for i in range(5): anss.add(cl(e)) self.assertEqual(len(anss), 1) anss = set() for e in d: anss.add(cl(e)) self.assertEqual(len(anss), 3) for e in d[0:150:20]: self.assertTrue(all(x==50 for x in cc2(e, orange.Classifier.GetProbabilities))) self.assertRaises(TypeError, orange.RandomClassifier, dist) self.assertRaises(ValueError, orange.RandomClassifier, None, orange.DiscDistribution()) self.assertRaises(ValueError, orange.RandomClassifier, d.domain[1], orange.Distribution(d.domain[0]))
def _generateProbabilities(self, prediction): # Method to artificialy generate a list the length of the number of classes and set the predicted class to 1 dist = orange.DiscDistribution(self.classVar) if prediction == "?": return dist dist[prediction] = 1 return dist
def __call__(self, example, resultType=orange.GetValue): # 1. calculate sum of distributions of examples that cover the example num_cover = 0.0 distribution = [0] * len(self.data.domain.classVar.values) for rsc in self.rulesClass: for rule in rsc.rules.rules: if rule.covers(example): num_cover += 1 tmp_dist = rule(example, orange.GetProbabilities) for i in range(len(distribution)): distribution[i] += tmp_dist[i] # 2. calculate average of distributions of rules that cover example if num_cover != 0: max_index = 0 for i in range(len(distribution)): distribution[i] = distribution[i] / num_cover if distribution[i] > distribution[max_index]: max_index = i dist = orange.DiscDistribution(distribution) value = orange.Value(self.data.domain.classVar, self.data.domain.classVar.values[max_index]) # if no rule fiers else: value, dist = self.majorityClassifier(example, orange.GetBoth) # 3. -----------return if resultType == orange.GetValue: return value elif resultType == orange.GetBoth: return (value, dist) else: return dist
def t2est_discrete(self): d = orange.ExampleTable("iris") dist = orange.Distribution(d.domain.class_var, d) cc = orange.ConstantClassifier(d.domain.class_var) self.assertEqual(cc.class_var, d.domain.class_var) self.assertEqual(cc.default_distribution.variable, cc.class_var) cc2 = orange.ConstantClassifier(dist) self.assertEqual(cc2.class_var, d.domain.class_var) self.assertEqual(cc2.default_distribution.variable, cc2.class_var) self.assertEqual(id(cc2.default_distribution), id(dist)) self.assertTrue(all(x==50 for x in cc2.default_distribution)) cc3 = orange.ConstantClassifier(d.domain.class_var, None, dist) self.assertEqual(cc3.class_var, d.domain.class_var) self.assertEqual(cc3.default_distribution.variable, cc3.class_var) self.assertEqual(id(cc3.default_distribution), id(dist)) self.assertTrue(all(x==50 for x in cc3.default_distribution)) cc4 = orange.ConstantClassifier(d.domain.class_var, "Iris-setosa", dist) self.assertEqual(cc4.class_var, d.domain.class_var) self.assertEqual(cc4.default_distribution.variable, cc3.class_var) self.assertEqual(id(cc4.default_distribution), id(dist)) self.assertTrue(all(x==50 for x in cc4.default_distribution)) for cl in [cc, cc2, cc3]: for e in d[0:150:20]: anss = set() for i in range(5): anss.add(cl(e)) self.assertEqual(len(anss), 1) anss = set() for e in d: anss.add(cl(e)) self.assertEqual(len(anss), 3) for e in d[0:150:20]: anss = set() for i in range(5): self.assertEqual(cc4(e), "Iris-setosa") for cl in [cc2, cc3, cc4]: for e in d[0:150:20]: self.assertTrue(all(x==50 for x in cl(e, orange.Classifier.GetProbabilities))) self.assertRaises(TypeError, orange.ConstantClassifier, d.domain.class_var, dist) self.assertRaises(ValueError, orange.ConstantClassifier, None, "?", orange.DiscDistribution()) self.assertRaises(ValueError, orange.ConstantClassifier, d.domain[1], "?", orange.Distribution(d.domain[0])) cc4.default_distribution = [50, 50, 50] self.assertEqual(list(cc4.default_distribution), [50, 50, 50]) cc5 = orange.ConstantClassifier(d.domain.class_var, "Iris-setosa", [50, 50, 50]) self.assertEqual(list(cc5.default_distribution), [50, 50, 50])
def _getProbabilities(self, ProbOf1): """Get the orange like output probabilities for the current predicted example""" #This is only valid for binary classifiers opencv limitation! # From openCv we know that the probability returned for this example represents the fraction of tree votes for class 1 #Find the classValue string that is represented by the scalar 1 in opencvRF class1 = dataUtilities.CvMat2orangeResponse(1, self.classVar).value dist = orange.DiscDistribution(self.classVar) dist[self.classVar.values.index(class1)] = ProbOf1 dist[not self.classVar.values.index(class1)] = 1 - ProbOf1 return dist
def __call__(self, ex, what=orange.Classifier.GetValue): val = ex[self.var1] * self.noValues2 + ex[self.var2] if what == orange.Classifier.GetValue: return orange.Value(self.classVar, val) probs = orange.DiscDistribution(self.classVar) probs[val] = 1.0 if what == orange.Classifier.GetProbabilities: return probs else: return (orange.Value(self.classVar, val), probs)
def test_construction(self): d = orange.ExampleTable("zoo") self.assertRaises(TypeError, orange.DiscDistribution, zip(d.domain["type"].values, self.freqs)) disc = orange.Distribution("type", d) disc7 = orange.DiscDistribution(self.freqs) self.assertEqual(disc, disc7) disc1 = orange.Distribution(d.domain.classVar) self.assertTrue(isinstance(disc1, orange.DiscDistribution))
def __call__(self, example, what=orange.Classifier.GetValue): probability = self.classifier.orange_classify(example) answer = orange.Value(self.classVar, int(round(probability))) probabilities = orange.DiscDistribution(self.classVar) probabilities[answer] = probability if what == orange.Classifier.GetValue: return answer elif what == orange.Classifier.GetProbabilities: return probabilities else: return answer, probabilities
def __call__(self, node, example): while node.branchSelector: branch = node.branchSelector(example) if branch.isSpecial() or int(branch)>=len(node.branches): votes = orange.DiscDistribution([random.randint(0, 100) for i in node.branches]) votes.normalize() print "Weights:", votes return node, votes nextNode = node.branches[int(branch)] if not nextNode: break node = nextNode return node
def __call__(self, ex, what=orange.Classifier.GetValue): value = self.classify(ex) result = orange.Value(ex.domain.classVar, str(value)) probs = orange.DiscDistribution(ex.domain.classVar) probs[value] = 1.0 if what == orange.Classifier.GetValue: return result elif what == orange.Classifier.GetProbabilities: return probs elif what == orange.Classifier.GetBoth: return result, probs else: raise ValueError("Bad what argument: %s" % ` what `)
def __getProbabilities(self, fannOutVector): """Get the orange like output probabilities for the current predicted example""" dist = orange.DiscDistribution(self.classVar) vectorSum = sum(fannOutVector) #fix the probabilities so that values are between 0 and 1 OutVector = [p / vectorSum for p in fannOutVector] subtract = abs(sum([x for x in OutVector if x < 0])) for idx, p in enumerate(OutVector): if p > 1: dist[self.classVar.values[idx]] = p - subtract elif p <= 0: dist[self.classVar.values[idx]] = 0 else: dist[self.classVar.values[idx]] = p return dist
def t2est_continuous(self): d = orange.ExampleTable("iris") dom2 = orange.Domain(d.domain.attributes) d = orange.ExampleTable(dom2, d) self.assertEqual(d.domain.class_var.var_type, orange.Variable.Continuous) dist = orange.Distribution(d.domain.class_var, d) cc = orange.ConstantClassifier(d.domain.class_var) self.assertEqual(cc.class_var, d.domain.class_var) self.assertEqual(cc.default_distribution.variable, cc.class_var) cc2 = orange.ConstantClassifier(dist) self.assertEqual(cc2.class_var, d.domain.class_var) self.assertEqual(cc2.default_distribution.variable, cc2.class_var) self.assertEqual(id(cc2.default_distribution), id(dist)) cc3 = orange.ConstantClassifier(d.domain.class_var, None, dist) self.assertEqual(cc3.class_var, d.domain.class_var) self.assertEqual(cc3.default_distribution.variable, cc3.class_var) self.assertEqual(id(cc3.default_distribution), id(dist)) cc4 = orange.ConstantClassifier(d.domain.class_var, 5, dist) self.assertEqual(cc4.class_var, d.domain.class_var) self.assertEqual(cc4.default_distribution.variable, cc3.class_var) self.assertEqual(id(cc4.default_distribution), id(dist)) for cl in [cc2, cc3]: for e in d: self.assertEqual(cl(e), dist.average()) for e in d: self.assertEqual(cc4(e), 5) self.assertRaises(TypeError, orange.ConstantClassifier, d.domain.class_var, dist) self.assertRaises(ValueError, orange.ConstantClassifier, None, "?", orange.DiscDistribution()) self.assertRaises(ValueError, orange.ConstantClassifier, d.domain[1], "?", orange.Distribution(d.domain[0])) cc4.default_distribution = [50, 50, 50] self.assertEqual(list(cc4.default_distribution), [50, 50, 50])
def getProbabilities(self, prediction): dist = orange.DiscDistribution(self.domain.classVar) dist[prediction] = 1 return dist
print "Quartiles: %5.3f - %5.3f - %5.3f" % ( dage.percentile(25), dage.percentile(50), dage.percentile(75)) print for x in range(170, 190): print "dens(%4.1f)=%5.3f," % (x / 10.0, dage.density(x / 10.0)), print "*** WORKCLASS ***" dwcl = dist["workclass"] print "Native representation:", dwcl.native() print "Keys:", dwcl.keys() print "Values:", dwcl.values() print "Items: ", dwcl.items() print disc = orange.DiscDistribution([0.5, 0.3, 0.2]) for i in range(20): print disc.random(), print v = orange.EnumVariable(values=["red", "green", "blue"]) disc.variable = v for i in range(20): print disc.random(), print print cont = orange.ContDistribution({0.1: 12, 0.3: 3, 0.7: 3}) print "Manually constructed continuous distibution: ", cont print
def _singlePredict(self, origExamples = None, resultType = orange.GetValue, returnDFV = False): """ orange.GetBoth - <type 'tuple'> -> (<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>) orange.GetValue - <type 'orange.Value'> -> <orange.Value 'Act'='3.44158792'> orange.GetProbabilities - <type 'orange.DiscDistribution'> -> <0.000, 0.000> returnDFV - Flag indicating to return the Decision Function Value. If set to True, it will encapsulate the original result asked by the keyword resultType and the DFV into a tuple: ((<orange.Value 'Act'='3.44158792'>, <3.442: 1.000>), 2.34443) (<orange.Value 'Act'='3.44158792'>, 2.34443) (<0.000, 0.000>, 2.34443) If it is not a binary classifier, DFV will be equal to None DFV will be a value from greater or equal to 0 """ res = None #dataUtilities.rmAllMeta(examples) if len(origExamples.domain.getmetas()) == 0: examples = origExamples else: examples = dataUtilities.getCopyWithoutMeta(origExamples) #Check if the examples are compatible with the classifier (attributes order and varType compatibility) if self.imputer: dataUtilities.verbose = self.verbose if not self.ExFix.ready: self.ExFix.set_domain(self.imputer.defaults.domain) self.ExFix.set_examplesFixedLog(self.examplesFixedLog) inExamples = self.ExFix.fixExample(examples) if not inExamples: if self.verbose > 0: print "Warning no example. Returning None prediction" return None #Imput the examples if there are missing values examplesImp = self.imputer(inExamples) # There is a problem with using the imputer when examples contain meta attributes. # Unable to remove meta attributes from the examples. OK to rm meta from ExampleTables, but not from Example objects. if not examplesImp: if self.verbose > 0: print "Unable to predict with the SVM model." if self.verbose > 0: print "Perhaps you need to remove meta attributes from your examples." return None else: if self.verbose > 0: print "Warning: No Imputer in SVM Classifier" examplesImp = examples if self.classifier.get_support_vector_count() ==0: if self.verbose > 0: print "WARNING: Support Vectors count is 0 (zero)" DFV = None if examplesImp: if self.scalizer: exToPredict = dataUtilities.Example2CvMat(self.scalizer.scaleEx(examplesImp,True), self.varNames) res = self.classifier.predict(exToPredict) res = self.scalizer.convertClass(res) if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV: DFV = self.classifier.predict(exToPredict, True) else: #On Regression models assume the DVF as the value predicted DFV = res self._updateDFVExtremes(DFV) res = dataUtilities.CvMat2orangeResponse(res,self.classVar) else: exToPredict = dataUtilities.Example2CvMat(examplesImp,self.varNames) res = self.classifier.predict(exToPredict) if self.classVar.varType != orange.VarTypes.Continuous and len(self.classVar.values) == 2 and returnDFV: DFV = self.classifier.predict(exToPredict, True) else: #On Regression models assume the DVF as the value predicted DFV = res self._updateDFVExtremes(DFV) res = dataUtilities.CvMat2orangeResponse(res,self.classVar) if resultType!=orange.GetValue: if examplesImp.domain.classVar.varType != orange.VarTypes.Continuous: dist = orange.DiscDistribution(examplesImp.domain.classVar) dist[res]=1 else: y_hat = self.classVar(res) dist = Orange.statistics.distribution.Continuous(self.classVar) dist[y_hat] = 1.0 if resultType==orange.GetProbabilities: res = dist else: res = (res,dist) if returnDFV: res = (res,DFV) self.nPredictions += 1 return res