def computeDists(data, weight=0, targetClass=0, N=100, learner=None): """ Compute distributions of likelihood ratio statistics of extreme (best) rules. """ if not learner: learner = createLearner() ######################### ## Learner preparation ## ######################### oldStopper = learner.ruleFinder.ruleStoppingValidator evaluator = learner.ruleFinder.evaluator learner.ruleFinder.evaluator = orange.RuleEvaluator_LRS() learner.ruleFinder.evaluator.storeRules = True learner.ruleFinder.ruleStoppingValidator = orange.RuleValidator_LRS( alpha=1.0) learner.ruleFinder.ruleStoppingValidator.max_rule_complexity = 0 # loop through N (sampling repetitions) maxVals = [] for d_i in range(N): # create data set (remove and randomize) tempData = createRandomDataSet(data) learner.ruleFinder.evaluator.rules = orange.RuleList() # Next, learn a rule bestRule = learner.ruleFinder(tempData, weight, targetClass, orange.RuleList()) maxVals.append(bestRule.quality) extremeDists = [compParameters(maxVals, 1.0, 1.0)] ##################### ## Restore learner ## ##################### learner.ruleFinder.evaluator = evaluator learner.ruleFinder.ruleStoppingValidator = oldStopper return extremeDists
def add_sub_rules(rules, examples, weight, learner, dists): apriori = orange.Distribution(examples.domain.classVar, examples, weight) newRules = orange.RuleList() for r in rules: newRules.append(r) # loop through rules for r in rules: tmpList = orange.RuleList() tmpRle = r.clone() tmpRle.filter.conditions = [] tmpRle.parentRule = None tmpRle.filterAndStore(examples, weight, r.classifier.defaultVal) tmpList.append(tmpRle) while tmpList and len(tmpList[0].filter.conditions) <= len( r.filter.conditions): tmpList2 = orange.RuleList() for tmpRule in tmpList: # evaluate tmpRule oldREP = learner.ruleFinder.evaluator.returnExpectedProb learner.ruleFinder.evaluator.returnExpectedProb = False learner.ruleFinder.evaluator.evDistGetter.dists = createEVDistList( dists[int(r.classifier.defaultVal)]) tmpRule.quality = learner.ruleFinder.evaluator( tmpRule, examples, weight, r.classifier.defaultVal, apriori) learner.ruleFinder.evaluator.returnExpectedProb = oldREP # if rule not in rules already, add it to the list if not True in [rules_equal(ri, tmpRule) for ri in newRules] and len( tmpRule.filter.conditions ) > 0 and tmpRule.quality > apriori[ r.classifier.defaultVal] / apriori.abs: newRules.append(tmpRule) # create new tmpRules, set parent Rule, append them to tmpList2 if not True in [rules_equal(ri, tmpRule) for ri in newRules]: for c in r.filter.conditions: tmpRule2 = tmpRule.clone() tmpRule2.parentRule = tmpRule tmpRule2.filter.conditions.append(c) tmpRule2.filterAndStore(examples, weight, r.classifier.defaultVal) if tmpRule2.classDistribution.abs < tmpRule.classDistribution.abs: tmpList2.append(tmpRule2) tmpList = tmpList2 for cl in examples.domain.classVar: tmpRle = orange.Rule() tmpRle.filter = orange.Filter_values(domain=examples.domain) tmpRle.parentRule = None tmpRle.filterAndStore(examples, weight, int(cl)) tmpRle.quality = tmpRle.classDistribution[int( cl)] / tmpRle.classDistribution.abs newRules.append(tmpRle) return newRules
def sortRules(self, rules): newRules = orange.RuleList() foundRule = True while foundRule: foundRule = False bestRule = None for r in rules: if r in newRules: continue if r.beta < 0.01 and r.beta > -0.01: continue if not bestRule: bestRule = r foundRule = True continue if len(r.filter.conditions) < len(bestRule.filter.conditions): bestRule = r foundRule = True continue if len(r.filter.conditions) == len( bestRule.filter.conditions) and r.beta > bestRule.beta: bestRule = r foundRule = True continue if bestRule: newRules.append(bestRule) return newRules
def __call__(self, example, result_type=orange.GetValue, retRules=False): def add(disc1, disc2, sumd): disc = orange.DiscDistribution(disc1) sumdisc = sumd for i, d in enumerate(disc): disc[i] += disc2[i] sumdisc += disc2[i] return disc, sumdisc # create empty distribution retDist = orange.DiscDistribution(self.examples.domain.classVar) covRules = orange.RuleList() # iterate through examples - add distributions sumdisc = 0. for r in self.rules: if r(example) and r.classDistribution: retDist, sumdisc = add(retDist, r.classDistribution, sumdisc) covRules.append(r) if not sumdisc: retDist = self.prior sumdisc = self.prior.abs for c in self.examples.domain.classVar: retDist[c] /= sumdisc if retRules: if result_type == orange.GetValue: return (retDist.modus(), covRules) if result_type == orange.GetProbabilities: return (retDist, covRules) return (retDist.modus(), retDist, covRules) if result_type == orange.GetValue: return retDist.modus() if result_type == orange.GetProbabilities: return retDist return (retDist.modus(), retDist)
def __init__(self, alpha=.05, min_coverage=0, max_rule_length=0, rules=orange.RuleList()): self.rules = rules self.validator = orange.RuleValidator_LRS( alpha=alpha, min_coverage=min_coverage, max_rule_length=max_rule_length)
def getBestRules(self, currentRules, examples, weightID): bestRules = orange.RuleList() for r in currentRules: if hasattr(r.learner, "argumentRule") and not orngCN2.rule_in_set( r, bestRules): bestRules.append(r) for r_i, r in enumerate(self.bestRule): if r and not rule_in_set(r, bestRules) and examples[r_i].getclass( ) == r.classifier.defaultValue: bestRules.append(r) return bestRules
def __call__(self, examples, weight=0): supervisedClassCheck(examples) rules = orange.RuleList() self.ruleStopping.apriori = orange.Distribution( examples.domain.classVar, examples) progress = getattr(self, "progressCallback", None) if progress: progress.start = 0.0 progress.end = 0.0 distrib = orange.Distribution(examples.domain.classVar, examples, weight) distrib.normalize() for targetClass in examples.domain.classVar: if progress: progress.start = progress.end progress.end += distrib[targetClass] self.targetClass = targetClass cl = orange.RuleLearner.__call__(self, examples, weight) for r in cl.rules: rules.append(r) if progress: progress(1.0, None) return CN2UnorderedClassifier(rules, examples, weight)
def learnRule(self, examples, weightID, targetClass): self.ruleFinder.evaluator.bestRule = None rule = self.ruleFinder(examples, weightID, targetClass, orange.RuleList()) return self.ruleFinder.evaluator.bestRule
def __call__(self, examples, weight=0): supervisedClassCheck(examples) apriori = orange.Distribution(examples.domain.classVar, examples, weight) ruleSet = orange.RuleList() # resulting set of rules # Progress bar in widgets progress = getattr(self, "progressCallback", None) if progress: self.progressCallback = progress progress.start = 0.0 progress.end = 0.0 distrib = orange.Distribution(examples.domain.classVar, examples, weightID) distrib.normalize() # Main Loop temp_dists = [] for cl_i, cl in enumerate(examples.domain.classVar): # rulesForClass ... rules for this class only rulesForClass = orange.RuleList() if progress: progress.start = progress.end progress.end += distrib[cl] # Compute EVD distribution if not set if getattr(self, "dists", None): self.ruleFinder.evaluator.evDistGetter.dists = createEVDistList( self.dists[cl_i]) temp_dists.append(self.dists[cl_i]) else: ds = computeDists(examples, weight=weight, targetClass=cl_i, N=self.N, learner=self) self.ruleFinder.evaluator.evDistGetter.dists = createEVDistList( ds) temp_dists.append(ds) examples = self.coverAndRemove.initialize(examples, weight, cl, apriori) self.ruleFinder.evaluator.probVar = examples.domain.getmeta( self.coverAndRemove.probAttribute) self.targetClass = cl # Learn rules while not self.dataStopping(examples, weight, cl): # Learn rule rule = self.learnRule(examples, weight, cl) if not rule or len( rule.filter.conditions ) == 0: # stop learning if no rule has been learned break (examples, weight) = self.coverAndRemove(rule, examples, weight, cl) # add rule to rule set if not rule_in_set(rule, rulesForClass): rulesForClass.append(rule) if progress: progress( self.coverAndRemove.remainingExamplesP(examples, cl), None) else: print "%4.2f," % self.coverAndRemove.remainingExamplesP( examples, cl), if not progress: print ruleSet.extend( self.coverAndRemove.getBestRules(rulesForClass, examples, weight)) if progress: progress(1.0, None) self.ruleFinder.evaluator.probVar = None if self.add_sub_rules: ruleSet = add_sub_rules(ruleSet, examples, weight, self, temp_dists) return self.LCR(ruleSet, examples, weight)