def subsume_correctset(self): if self.correctset.__len__() > 1: subsumer = None compare_list = self.correctset.copy() for ref in self.correctset: if ClassifierMethods.is_subsumer(self, self.popset[ref]): subsumer = self.popset[ref] compare_list = compare_list.remove(ref) break if subsumer and compare_list: delete_list = [ ref for ref in compare_list if ClassifierMethods.is_more_general( self, subsumer, self.popset[ref]) ] sub = 0 for ref in delete_list: ref -= sub subsumer.update_numerosity(self.popset[ref].numerosity) self.remove_from_pop(ref) self.remove_from_matchset(ref) self.remove_from_correctset(ref) sub += 1 else: return
def subsume_into_parents(self, offspring, parent1, parent2): if ClassifierMethods.subsumption(self, parent1, offspring): self.micro_pop_size += 1 parent1.update_numerosity(1) elif ClassifierMethods.subsumption(self, parent2, offspring): self.micro_pop_size += 1 parent2.update_numerosity(1) else: self.subsume_into_correctset(offspring)
def __init__(self, attribute_info, dtypes, rand_func, sim_delta, sim_mode='global', clustering_method=None, cosine_matrix=None, popset=None, data_cov_inv=None): ClassifierMethods.__init__(self, dtypes) GraphPart.__init__(self, sim_delta) self.popset = [] self.matchset = [] self.correctset = [] self.micro_pop_size = 0 self.ave_generality = 0.0 self.ave_fitness = 0.0 self.classifier = Classifier() self.attribute_info = attribute_info self.dtypes = dtypes self.random = rand_func self.cosine_matrix = cosine_matrix self.k = MAX_CLASSIFIER if popset: self.popset = popset if sim_mode == 'global' and not cosine_matrix.any(): raise Exception( 'similarity matrix required when sim_mode==Global!') if sim_mode == 'global': self.sim_mode = 1 else: self.sim_mode = 0 if clustering_method not in [None, 'hfps', 'wsc']: raise Exception('undefined clustering method!') if clustering_method == 'hfps': self.clustering_method = 1 elif clustering_method == 'wsc': self.clustering_method = 2 else: self.clustering_method = 0 if data_cov_inv.any(): self.cov_inv = data_cov_inv
def get_identical(self, classifier, search_matchset=False): if search_matchset: identical = [ self.popset[ref] for ref in self.matchset if ClassifierMethods.is_equal( self, classifier, self.popset[ref]) ] if identical: return identical[0] else: identical = [ cl for cl in self.popset if ClassifierMethods.is_equal(self, classifier, cl) ] if identical: return identical[0] return None
def write_pop(self, pop, dtypes): try: file_name = join(curdir, REPORT_PATH, DATA_HEADER, "model_" + str(self.exp) + ".csv") model_file = open(file_name, 'w') except Exception as inst: print(type(inst)) print(inst.args) print(inst) print('cannot open file', " model_" + str(self.exp) + ".csv") raise method = ClassifierMethods(dtypes) header = ",".join(['f' + str(i) for i in range(dtypes.__len__())]) header += ", specificity, prediction, label_precision, fitness, hloss, numerosity, match_count, " \ "avg_match_set, init_time, ga_time \n" model_file.write(header) [model_file.write(method.classifier_print(cl)) for cl in pop] model_file.close()
def subsume_into_correctset(self, classifier): choices = [ ref for ref in self.correctset if ClassifierMethods.subsumption( self, self.popset[ref], classifier) ] if choices: idx = self.random.randint(0, choices.__len__() - 1) self.popset[choices[idx]].update_numerosity(1) self.micro_pop_size += 1 return self.insert_classifier_pop(classifier)
def apply_ga(self, iteration, state, data): changed0 = False if self.correctset.__len__() > 1: parent1, parent2, offspring1, offspring2 = self.selection( iteration) if self.random.random( ) < P_XOVER and not ClassifierMethods.is_equal( self, offspring1, offspring2): offspring1, offspring2, changed0 = self.xover( offspring1, offspring2) offspring1.condition, offspring1.specified_atts, changed1 = self.mutate( offspring1, state) offspring2.condition, offspring2.specified_atts, changed2 = self.mutate( offspring2, state) else: parent1 = self.popset[self.correctset[0]] parent2 = parent1 offspring1 = Classifier() offspring1.classifier_copy(parent1, iteration) offspring2 = Classifier() offspring2.classifier_copy(parent2, iteration) offspring1.condition, offspring1.specified_atts, changed1 = self.mutate( offspring1, state) offspring2.condition, offspring2.specified_atts, changed2 = self.mutate( offspring2, state) if changed0: offspring1.set_fitness( FITNESS_RED * (offspring1.fitness + offspring2.fitness) / 2) offspring2.set_fitness(offspring1.fitness) else: offspring1.set_fitness(FITNESS_RED * offspring1.fitness) offspring2.set_fitness(FITNESS_RED * offspring2.fitness) if ga_coverage(offspring1, data, self.dtypes): self.insert_discovered_classifier(offspring1, parent1, parent2) if ga_coverage(offspring2, data, self.dtypes): self.insert_discovered_classifier(offspring2, parent1, parent2)