Пример #1
0
def find_best_feature_partition(examples,
                                compare_fun,
                                feature,
                                max_splits):
    assert isinstance(examples, ml_exampleset)
    assert type(feature) == IntType
    assert type(max_splits) == IntType

    best_relinfgain = -1
    best_absinfgain = -1
    best_part       = None
    part_gen        = partition_generator_feature(examples, feature, max_splits)
    while 1:
        try:
            part       = part_gen.next()
            apriori    = part.get_class_entropy()
            cost       = part.get_entropy()
            remainder  = part.get_remainder_entropy()
            # print "# A-priori, cost, remainder = (%2.6f, %2.6f, %2.6f)" %\
            # (apriori,cost,remainder)
            absinfgain = apriori-remainder
            relinfgain = pylib_probabilities.rel_info_gain(apriori,
                                                           remainder,
                                                           cost)
            
            #print relinfgain, absinfgain
            if compare_fun((relinfgain, absinfgain),
                           (best_relinfgain,best_absinfgain)) > 0:
                best_relinfgain = relinfgain
                best_absinfgain = absinfgain
                best_part       = part
        except StopIteration:
            break
    return (best_relinfgain, best_absinfgain, best_part)
Пример #2
0
def find_best_feature_partition(examples, compare_fun, feature, max_splits):
    assert isinstance(examples, ml_exampleset)
    assert type(feature) == IntType
    assert type(max_splits) == IntType

    best_relinfgain = -1
    best_absinfgain = -1
    best_part = None
    part_gen = partition_generator_feature(examples, feature, max_splits)
    while 1:
        try:
            part = part_gen.next()
            apriori = part.get_class_entropy()
            cost = part.get_entropy()
            remainder = part.get_remainder_entropy()
            # print "# A-priori, cost, remainder = (%2.6f, %2.6f, %2.6f)" %\
            # (apriori,cost,remainder)
            absinfgain = apriori - remainder
            relinfgain = pylib_probabilities.rel_info_gain(
                apriori, remainder, cost)

            #print relinfgain, absinfgain
            if compare_fun((relinfgain, absinfgain),
                           (best_relinfgain, best_absinfgain)) > 0:
                best_relinfgain = relinfgain
                best_absinfgain = absinfgain
                best_part = part
        except StopIteration:
            break
    return (best_relinfgain, best_absinfgain, best_part)
Пример #3
0
 def plain_rel_inf_gain(self):
     apriori_entropy = pylib_probabilities.compute_entropy_absdistrib([1]
                                            * self.get_class_number())
     real_entropy = self.get_class_entropy()
     return pylib_probabilities.rel_info_gain(apriori_entropy,
                                              real_entropy,
                                              real_entropy)
Пример #4
0
 def entropies(self):
     """
     Return a tuple of various entropic measures.
     """
     tp        = tree_partition(self, self.sample)
     apriori   = tp.get_class_entropy()
     cost      = tp.get_entropy()
     remainder = tp.get_remainder_entropy()
     absinfgain = apriori-remainder
     if cost == 0:
         relinfgain = 0
     else:
         relinfgain = pylib_probabilities.rel_info_gain(apriori,
                                                        remainder,
                                                        cost)
     return (apriori, remainder, absinfgain, relinfgain)
Пример #5
0
 def plain_rel_inf_gain(self):
     apriori_entropy = pylib_probabilities.compute_entropy_absdistrib(
         [1] * self.get_class_number())
     real_entropy = self.get_class_entropy()
     return pylib_probabilities.rel_info_gain(apriori_entropy, real_entropy,
                                              real_entropy)