def find_best_feature_partition(examples, compare_fun, feature, max_splits): assert isinstance(examples, ml_exampleset) assert type(feature) == IntType assert type(max_splits) == IntType best_relinfgain = -1 best_absinfgain = -1 best_part = None part_gen = partition_generator_feature(examples, feature, max_splits) while 1: try: part = part_gen.next() apriori = part.get_class_entropy() cost = part.get_entropy() remainder = part.get_remainder_entropy() # print "# A-priori, cost, remainder = (%2.6f, %2.6f, %2.6f)" %\ # (apriori,cost,remainder) absinfgain = apriori-remainder relinfgain = pylib_probabilities.rel_info_gain(apriori, remainder, cost) #print relinfgain, absinfgain if compare_fun((relinfgain, absinfgain), (best_relinfgain,best_absinfgain)) > 0: best_relinfgain = relinfgain best_absinfgain = absinfgain best_part = part except StopIteration: break return (best_relinfgain, best_absinfgain, best_part)
def find_best_feature_partition(examples, compare_fun, feature, max_splits): assert isinstance(examples, ml_exampleset) assert type(feature) == IntType assert type(max_splits) == IntType best_relinfgain = -1 best_absinfgain = -1 best_part = None part_gen = partition_generator_feature(examples, feature, max_splits) while 1: try: part = part_gen.next() apriori = part.get_class_entropy() cost = part.get_entropy() remainder = part.get_remainder_entropy() # print "# A-priori, cost, remainder = (%2.6f, %2.6f, %2.6f)" %\ # (apriori,cost,remainder) absinfgain = apriori - remainder relinfgain = pylib_probabilities.rel_info_gain( apriori, remainder, cost) #print relinfgain, absinfgain if compare_fun((relinfgain, absinfgain), (best_relinfgain, best_absinfgain)) > 0: best_relinfgain = relinfgain best_absinfgain = absinfgain best_part = part except StopIteration: break return (best_relinfgain, best_absinfgain, best_part)
def plain_rel_inf_gain(self): apriori_entropy = pylib_probabilities.compute_entropy_absdistrib([1] * self.get_class_number()) real_entropy = self.get_class_entropy() return pylib_probabilities.rel_info_gain(apriori_entropy, real_entropy, real_entropy)
def entropies(self): """ Return a tuple of various entropic measures. """ tp = tree_partition(self, self.sample) apriori = tp.get_class_entropy() cost = tp.get_entropy() remainder = tp.get_remainder_entropy() absinfgain = apriori-remainder if cost == 0: relinfgain = 0 else: relinfgain = pylib_probabilities.rel_info_gain(apriori, remainder, cost) return (apriori, remainder, absinfgain, relinfgain)
def plain_rel_inf_gain(self): apriori_entropy = pylib_probabilities.compute_entropy_absdistrib( [1] * self.get_class_number()) real_entropy = self.get_class_entropy() return pylib_probabilities.rel_info_gain(apriori_entropy, real_entropy, real_entropy)