def filterAndStore(self, oldRule=None): #print 'hola, hola' self.examples = self.filter(self.data) # set examples self.classifier = self.learner(self.examples) # set classifier distribution = [0.0]* len(self.data.domain.classVar.values) self.complexity = len(self.filter.conditions) # set rule complexity self.ruleString = self.ruleToString() self.orderedRuleString = self.orderedRuleToString() if len(self.examples)>0: for d in self.examples: distribution[int(d.getclass())]+=1 distribution = map (lambda d: d/len(self.examples), distribution) self.classDistribution = orange.Distribution(distribution) # set distribution self.TP = filter(lambda e: e.getclass()==self.targetClass, self.examples) # True positives self.FP = filter(lambda e: e.getclass()!=self.targetClass, self.examples) # flase positives self.N_examples = len(self.data) self.NTC = int(orange.getClassDistribution(self.data)[self.targetClass]) if oldRule == None or oldRule.complexity == 0: self.P = len(self.data.filter({self.data.domain.classVar : self.targetClass})) self.N = len(self.data.filter({self.data.domain.classVar : self.targetClass}, negate=1)) else: self.P = len(oldRule.TP) self.N = len(oldRule.FP) self.TPlen = len(self.TP) * 1.0 self.FPlen = len(self.FP) * 1.0 self.calculateRefinementQuality() self.calculateSelectionQuality() self.calculateWeightedSelectionQuality() self.quality = self.TPlen / (len(self.FP) + self.g) # set rule quality: generalization quocient #print self.quality self.support = 1.0* len(self.examples)/len(self.data) # set rule support self.confidence = self.TPlen/len(self.examples) else: self.classDistribution = distribution self.TP= [] self.FP= [] self.quality = 0 # set rule quality: generalization kvocient self.support = 0 # set rule support self.TPlen = 0 self.FPlen = 0 self.N_examples = 0 self.NTC = 0 self.refinement_quality = 0.0 self.selection_quality = 0.0 self.weighted_selection_quality = 0.0
def listOfClusters(root): l = [] listOfClusters0(root, l) return l prune(root, 1.4) for n, cluster in enumerate(listOfClusters(root)): print "\n\n*** Cluster %i ***\n" % n for ex in cluster: print ex for cluster in listOfClusters(root): dist = orange.getClassDistribution(cluster) for e, d in enumerate(dist): print "%s: %3.0f " % (data.domain.classVar.values[e], d), print def listOfClustersT0(cluster, alist): if not cluster.branches: alist.append(orange.ExampleTable(cluster)) else: for branch in cluster.branches: listOfClustersT0(branch, alist) def listOfClustersT(root): l = []
indices2.randomGenerator = None indices2.randseed = 42 for i in range(5): print indices2(data) print "\nIndices with p0 set as probability (not 'a number of')" indices2.p0 = 0.25 print indices2(data) print "\n... with stratification" indices2.stratified = indices2.Stratified ind = indices2(data) print ind data2 = data.select(ind) od = orange.getClassDistribution(data) sd = orange.getClassDistribution(data2) od.normalize() sd.normalize() print od print sd print "\n... and without stratification" indices2.stratified = indices2.NotStratified print indices2(data) ind = indices2(data) print ind data2 = data.select(ind) od = orange.getClassDistribution(data) sd = orange.getClassDistribution(data2) od.normalize()
random.seed(0) data = orange.ExampleTable("lenses") id = -42 # Note that this is wrong. Id should be assigned by # id = orange.newmetaid() # We only do this so that the script gives the same output each time it's run for example in data: example[id] = orange.Value(random.random()) print data[0] print orange.getClassDistribution(data) print orange.getClassDistribution(data, id) w = orange.FloatVariable("w") data.domain.addmeta(id, w) print data[0] print data[0][id] print data[0][w] print data[0]["w"] data[0][id] = orange.Value(w, 2.0) data[0][id] = "2.0" data[0][id] = 2.0
listOfClusters0(branch, alist) def listOfClusters(root): l = [] listOfClusters0(root, l) return l prune(root, 1.4) for n, cluster in enumerate(listOfClusters(root)): print "\n\n*** Cluster %i ***\n" % n for ex in cluster: print ex for cluster in listOfClusters(root): dist = orange.getClassDistribution(cluster) for e, d in enumerate(dist): print "%s: %3.0f " % (data.domain.classVar.values[e], d), print def listOfClustersT0(cluster, alist): if not cluster.branches: alist.append(orange.ExampleTable(cluster)) else: for branch in cluster.branches: listOfClustersT0(branch, alist) def listOfClustersT(root): l = [] listOfClustersT0(root, l) return l
print "\nIndices with randseed" indices2.randomGenerator = None indices2.randseed = 42 for i in range(5): print indices2(data) print "\nIndices with p0 set as probability (not 'a number of')" indices2.p0 = 0.25 print indices2(data) print "\n... with stratification" indices2.stratified = indices2.Stratified ind = indices2(data) print ind data2 = data.select(ind) od = orange.getClassDistribution(data) sd = orange.getClassDistribution(data2) od.normalize() sd.normalize() print od print sd print "\n... and without stratification" indices2.stratified = indices2.NotStratified print indices2(data) ind = indices2(data) print ind data2 = data.select(ind) od = orange.getClassDistribution(data) sd = orange.getClassDistribution(data2) od.normalize()