def model_baseline(self, prob): n_cols = prob.data.shape[1] particle = np.zeros(shape=(1, n_cols + 1)) particle[:] = 1 evaluator_ = SolutionEvaluator(prob, 1) score = evaluator_.evaluate(particle) return score[0]
def fit(self, X, unused_y, **kargs): if not isinstance(X, pd.DataFrame): raise PSOException('The "X" parameter must be a data frame') self._initialize(X) prob = Problem(X, unused_y, self.estimator, self.cv, **kargs) self.N_ = prob.n_cols self.evaluator_ = SolutionEvaluator(prob, self.num_particles) score_all = self.model_baseline(prob) rootLogger.info(( f'Score with all features - {score_all[-1]}')) self.velocity_ = np.zeros(shape=(self.num_particles, self.N_)) self.best_global_, self.best_global_[:] = \ np.zeros(shape=(1, self.N_ + 1)), 'nan' self.best_individual_, self.best_individual_[:] = \ np.zeros(shape=(self.num_particles, self.N_ + 1)), 'nan' self.solution_ = np.zeros(shape=(self.max_iter + 1, self.N_ + 1)) while not self._is_stop_criteria_accepted(): self.init_search_() count_sel_feat = self.count_features(self.best_global_[0]) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1]) self.selected_features_, = np.where(self.selected_features_.mask == True) colunas = list(prob.data.iloc[:, self.selected_features_].columns) rootLogger.info(( f'Iteration: {self.iteration_}/{self.max_iter} \n , ' f'Best global metric: {self.best_global_[:, -1]} \n , ' f'Index features_selected: {self.selected_features_} \n , ' f'Number of selected features: {count_sel_feat} \n , ' f'Columns selected: {colunas}')) for i in range(0, self.num_particles): self.count.append(self.count_features( self.best_individual_[i, :])) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1]) self.selected_features_, = np.where(self.selected_features_.mask == True) colunas = list(prob.data.iloc[:, self.selected_features_].columns) rootLogger.info((f'Final Index features selected: {self.selected_features_} /n, ' f'Final Columns selected: {colunas} \n')) self._final_cols = colunas self._final_index = self.selected_features_
class BBASelector(object): def __init__(self, estimator, theta=1.0, gamma=1.0, epsilon=1, num_particles=30, max_iter=100, max_local_improvement=50, maximize_objective=True, initialization='uniform', cv=3): self.theta = theta self.gamma = gamma self.epsilon = epsilon self.num_particles = num_particles self.max_iter = max_iter self.cv = cv self.evaluator_ = None self.estimator = estimator self.velocity_ = None self.solution_ = None self.initialize = 0 self.initialize_1 = 0 self.maxfit = 0 self.maxindex = 0 self.N = None self.max_local_improvement = max_local_improvement self.local_improvement = 0 self.particles = None self.count = [] self.N_ = 0 self.iteration_ = 0 self.pop_ = None self.count_global = 0 self._final_cols = None self._final_index = None self._setup_initialization(initialization) self._setup_solution_comparator(maximize_objective) self.selected_features_ = None def _setup_initialization(self, initialization): init_method = { 'uniform': create_population_uniform_strategy, '20_50': create_population_20_50_strategy } self._initialization = initialization if initialization not in init_method: raise BBAException(f'Invalid method {initialization!r}') self.init_method_ = init_method[initialization] #self.init_search_ = init_search[type_search] def _setup_solution_comparator(self, maximize_objective): self.maximize_objective = maximize_objective if self.maximize_objective: self.is_solution_better = maximize_comparator else: self.is_solution_better = minimize_comparator def model_baseline(self, prob): n_cols = prob.data.shape[1] particle = np.zeros(shape=(1, n_cols + 1)) particle[:] = 1 evaluator_ = SolutionEvaluator(prob, 1) score = evaluator_.evaluate(particle) return score[0] def fit(self, X, unused_y, **kargs): if not isinstance(X, pd.DataFrame): raise BBAException('The "X" parameter must be a data frame') prob = Problem(X, unused_y, self.estimator, self.cv, **kargs) self.N_ = prob.n_cols self._initialize(X) self.evaluator_ = SolutionEvaluator(prob, self.num_particles) score_all = self.model_baseline(prob) rootLogger.info((f'Score with all features - {score_all[-1]}')) while not self._is_stop_criteria_accepted(): self.init_search() count_sel_feat = self.count_features(self.best_global_[0]) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where( best_glob[:-1] > 0.6, best_glob[:-1]) self.selected_features_, = np.where( self.selected_features_.mask == True) colunas = list(prob.data.iloc[:, self.selected_features_].columns) rootLogger.info( (f'Iteration: {self.iteration_}/{self.max_iter} \n , ' f'Best global metric: {self.best_global_[:, -1]} \n , ' f'Index features_selected: {self.selected_features_} \n , ' f'Number of selected features: {count_sel_feat} \n , ' f'Columns selected: {colunas}')) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where(best_glob[:-1] > 0.6, best_glob[:-1]) self.selected_features_, = np.where( self.selected_features_.mask == True) colunas = list(prob.data.iloc[:, self.selected_features_].columns) rootLogger.info( (f'Final Index features selected: {self.selected_features_} /n, ' f'Final Columns selected: {colunas} \n')) self._final_cols = colunas self._final_index = self.selected_features_ def _initialize(self, X): self.iteration_ = 0 self.pop_ = self.init_method_(X, self.num_particles) self.particles_loudness, self.particles_rate = pulse_frequency_rate( self.num_particles) self.particles_rate_ = np.zeros(shape=(self.num_particles, 1)) self.velocity_ = np.zeros(shape=(self.num_particles, self.N_)) self.best_individual_ = np.zeros(shape=(self.num_particles, 1)) self.best_global_ = np.zeros(shape=(1, self.N_ + 1)) def _is_stop_criteria_accepted(self): no_global_improv = self.local_improvement >= self.max_local_improvement max_iter_reached = self.iteration_ >= self.max_iter return max_iter_reached or no_global_improv def init_search(self): self.pop_ = self.evaluator_.evaluate(self.pop_) self.evaluate_score(self.pop_) self.calculate_best_global() self.bat_position() self.update_velocity() self.iteration_ += 1 def evaluate_score(self, pop): if self.initialize == 0: for i in np.arange(0, len(pop)): self.best_individual_[i] = pop[i, -1] self.initialize = 1 for _i in np.arange(0, self.num_particles): rand = np.random.choice([0, 1], 1)[0] if (rand < self.particles_loudness[_i]) & ( pop[_i, -1] > self.best_individual_[_i]): self.best_individual_[_i] = pop[_i, -1] self.particles_loudness[ _i] = self.theta * self.particles_loudness[_i] self.particles_rate_[_i] = self.particles_rate[_i] * ( 1 - np.exp(self.gamma * self.iteration_)) self.maxfit, self.maxindex = np.max(self.best_individual_), np.argmax( self.best_individual_) def calculate_best_global(self): if self.initialize_1 == 0: for i in np.arange(0, self.N_ + 1): self.best_global_[0, i] = self.pop_[0, i] self.initialize_1 = 1 if self.is_solution_better(self.maxfit, self.best_global_[0, -1]): for j in np.arange(0, self.N_ + 1): self.best_global_[0, j] = self.pop_[self.maxindex, j] def bat_position(self): for _i in np.arange(0, self.num_particles): rand = np.random.choice([0, 1], 1)[0] if rand > self.particles_rate_[_i]: for j in range(0, self.N_): self.pop_[_i, j] = self.pop_[_i, j] + self.epsilon * np.mean( self.particles_loudness) sigma = uniform(0, 1) if sigma < 1 * (1 / (1 + np.exp(-self.pop_[_i, j]))): self.pop_[_i, j] = 1 else: self.pop_[_i, j] = 0 def update_velocity(self): for _i in np.arange(0, self.num_particles): betha = np.random.choice([0, 1], 1)[0] rand = np.random.choice([0, 1], 1)[0] if (rand < self.particles_loudness[_i]) & ( self.best_individual_[_i] < self.best_global_[0, -1]): for j in range(0, self.N_): fi = 0 + (0 + 1) * betha self.velocity_[_i, j] = self.velocity_[_i, j] + ( self.best_global_[0, j] - self.pop_[_i, j]) * fi self.pop_[_i, j] = self.pop_[_i, j] + self.velocity_[_i, j] sigma = uniform(0, 1) if sigma < (1 / (1 + np.exp(-self.pop_[_i, j]))): self.pop_[_i, j] = 1 else: self.pop_[_i, j] = 0 def count_features(self, particle_proportions, threshold=1): count = 0 for i in range(0, self.N_): if particle_proportions[i] == threshold: count = count + 1 return count @property def final_cols(self): return self._final_cols @property def final_index(self): return self._final_index
class PSOSelector(object): def __init__(self, estimator, w=0.7298, c1=1.49618, c2=1.49618, num_particles=30, max_iter=100, max_local_improvement=50, maximize_objective=True, initialization='uniform', fitness_method='type_2', cv = 3): self.w = w self.c1 = c1 self.c2 = c2 self.num_particles = num_particles self.max_iter = max_iter self.cv = cv self.evaluator_ = None self.estimator = estimator self.velocity_ = None self.solution_ = None self.initialize = 0 self.initialize_1 = 0 self.N = None self.max_local_improvement = max_local_improvement self.local_improvement = 0 self.particles = None self.count = [] self.N_ = 0 self.iteration_ = 0 self.pop_ = None self.count_global = 0 self._final_cols = None self._final_index = None self._setup_initialization(initialization, fitness_method) self._setup_solution_comparator(maximize_objective) self.selected_features_ = None def _setup_initialization(self, initialization, type_search): init_method = { 'uniform': create_population_uniform_strategy, '20_50': create_population_20_50_strategy } init_search = { 'type_1': self.search_type_1, 'type_2': self.search_type_2, 'type_3': self.search_type_3, 'type_4': self.search_type_4 } self._initialization = initialization if initialization not in init_method: raise PSOException(f'Invalid method {initialization!r}') self.init_method_ = init_method[initialization] self.init_search_ = init_search[type_search] def _setup_solution_comparator(self, maximize_objective): self.maximize_objective = maximize_objective if self.maximize_objective: self.is_solution_better = maximize_comparator else: self.is_solution_better = minimize_comparator def model_baseline(self, prob): n_cols = prob.data.shape[1] particle = np.zeros(shape=(1, n_cols + 1)) particle[:] = 1 evaluator_ = SolutionEvaluator(prob, 1) score = evaluator_.evaluate(particle) return score[0] def fit(self, X, unused_y, **kargs): if not isinstance(X, pd.DataFrame): raise PSOException('The "X" parameter must be a data frame') self._initialize(X) prob = Problem(X, unused_y, self.estimator, self.cv, **kargs) self.N_ = prob.n_cols self.evaluator_ = SolutionEvaluator(prob, self.num_particles) score_all = self.model_baseline(prob) rootLogger.info(( f'Score with all features - {score_all[-1]}')) self.velocity_ = np.zeros(shape=(self.num_particles, self.N_)) self.best_global_, self.best_global_[:] = \ np.zeros(shape=(1, self.N_ + 1)), 'nan' self.best_individual_, self.best_individual_[:] = \ np.zeros(shape=(self.num_particles, self.N_ + 1)), 'nan' self.solution_ = np.zeros(shape=(self.max_iter + 1, self.N_ + 1)) while not self._is_stop_criteria_accepted(): self.init_search_() count_sel_feat = self.count_features(self.best_global_[0]) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1]) self.selected_features_, = np.where(self.selected_features_.mask == True) colunas = list(prob.data.iloc[:, self.selected_features_].columns) rootLogger.info(( f'Iteration: {self.iteration_}/{self.max_iter} \n , ' f'Best global metric: {self.best_global_[:, -1]} \n , ' f'Index features_selected: {self.selected_features_} \n , ' f'Number of selected features: {count_sel_feat} \n , ' f'Columns selected: {colunas}')) for i in range(0, self.num_particles): self.count.append(self.count_features( self.best_individual_[i, :])) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1]) self.selected_features_, = np.where(self.selected_features_.mask == True) colunas = list(prob.data.iloc[:, self.selected_features_].columns) rootLogger.info((f'Final Index features selected: {self.selected_features_} /n, ' f'Final Columns selected: {colunas} \n')) self._final_cols = colunas self._final_index = self.selected_features_ def _initialize(self, X): self.iteration_ = 0 self.pop_ = self.init_method_(X, self.num_particles) def _is_stop_criteria_accepted(self): no_global_improv = self.local_improvement >= self.max_local_improvement max_iter_reached = self.iteration_ >= self.max_iter return max_iter_reached or no_global_improv def search_type_1(self): self.pop_ = self.evaluator_.evaluate(self.pop_) self.calculate_best_individual_type_1(self.pop_) self.calculate_best_global_type_1() self.solution_[self.iteration_, :] = self.best_global_ self.update_velocity() self.iteration_ += 1 def search_type_2(self): self.pop_ = self.evaluator_.evaluate(self.pop_) self.calculate_best_individual_type_2(self.pop_) self.calculate_best_global_type_2() self.solution_[self.iteration_, :] = self.best_global_ self.update_velocity() self.iteration_ += 1 def search_type_3(self): self.pop_ = self.evaluator_.evaluate(self.pop_) self.calculate_best_individual_type_3(self.pop_) self.calculate_best_global_type_3() self.solution_[self.iteration_, :] = self.best_global_ self.update_velocity() self.iteration_ += 1 def search_type_4(self): self.pop_ = self.evaluator_.evaluate(self.pop_) self.calculate_best_individual_type_4(self.pop_) self.calculate_best_global_type_4() self.solution_[self.iteration_, :] = self.best_global_ self.update_velocity() self.iteration_ += 1 def update_velocity(self): w = self.w c1, c2 = self.c1, self.c2 for i in range(0, len(self.pop_) - 1): for j in range(0, self.N_): r1= round(uniform(0,1), 2) r2 = round(uniform(0, 1), 2) pop = self.pop_[i, j] inertia = w * self.velocity_[i,j] cognitive = c1 * r1 * (self.best_individual_[i,j] - pop) social = c2 * r2 * (self.best_global_[0, j] - pop) velocity = inertia + cognitive + social self.velocity_[i,j] = velocity self.pop_[i,j] += velocity def calculate_best_individual_type_2(self, pop): if self.initialize == 0: for i in range(0, len(pop)): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop[i,j] self.initialize = 1 return for i in range(0, len(pop)): candidate_a = pop[i, self.N_] candidate_b = self.best_individual_[i, self.N_] if self.is_solution_better(candidate_a,candidate_b): for j in range(0 , self.N_ + 1): self.best_individual_[i,j] = pop[i,j] continue particle_count = self.count_features(self.pop_[i, :]) count_best_individual = self.count_features( self.best_individual_[i, :]) if particle_count > 0: if (candidate_a == candidate_b and particle_count < count_best_individual): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop[i,j] def calculate_best_global_type_2(self): if self.initialize_1 == 0: for i in range(0, self.N_ + 1): self.best_global_[0,i] = self.best_individual_[0, i] self.initialize_1 = 1 self.count_global = self.count_features(self.best_global_[0, :]) for i in range(0, self.num_particles): best_ind = self.best_individual_[i, self.N_] best_global = self.best_global_[0, self.N_] if self.is_solution_better(best_ind, best_global): self.local_improvement = 1 for j in range(0, self.N_ + 1): self.best_global_[0,j] = self.best_individual_[i,j] self.count_global = self.count_features( self.best_global_[0, :]) continue count_best_individual = self.count_features(self.best_individual_[i, :]) if (best_global == best_ind and count_best_individual < self.count_global): self.local_improvement = 1 self.count_global = 0 for j in range(0, self.N_ + 1): self.best_global_[0, j] = self.best_individual_[i,j] self.count_global = self.count_features( self.best_global_[0, :]) def calculate_best_individual_type_1(self,pop): if self.initialize == 0: for i in range(0, len(pop)): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop[i,j] self.initialize = 1 return for i in range(0, len(pop)): if self.is_solution_better(pop[i,self.N_], self.best_individual_[i, self.N_]): for j in range(0, self.N_ + 1): self.best_individual_[i, j] = pop[i,j] def calculate_best_global_type_1(self): if self.initialize_1 == 0: for i in range(0, self.N_ + 1): self.best_global_[0,i] = self.best_individual_[0,i] self.initialize_1 = 1 for i in range(0, len(self.pop_)): if self.is_solution_better(self.best_individual_[i, self.N_], self.best_global_[0, self.N_]): self.local_improvement = 1 for j in range(0, self.N_ + 1): self.best_global_[0,j] = self.best_individual_[i,j] self.local_improvement += 1 def calculate_best_individual_type_3(self, pop): if self.initialize == 0: for i in range(0, len(pop)): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop[i,j] self.initialize = 1 return for i in range(0, len(pop)): candidate_a = pop[i, self.N_] candidate_b = self.best_individual_[i, self.N_] particle_count = self.count_features(self.pop_[i, :]) count_best_individual = self.count_features( self.best_individual_[i, :]) if particle_count > 0: if (self.is_solution_better(candidate_a,candidate_b) and particle_count <= count_best_individual): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop[i,j] elif (candidate_a == candidate_b and particle_count < count_best_individual): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop[i,j] else: continue def calculate_best_global_type_3(self): if self.initialize_1 == 0: for i in range(0, self.N_ + 1): self.best_global_[0,i] = self.best_individual_[0, i] self.initialize_1 = 1 self.count_global = self.count_features(self.best_global_[0, :]) for i in range(0, self.num_particles): best_ind = self.best_individual_[i, self.N_] best_global = self.best_global_[0, self.N_] count_best_individual = self.count_features(self.best_individual_[i, :]) if (self.is_solution_better(best_ind,best_global) and count_best_individual <= self.count_global): self.local_improvement = 1 for j in range(0, self.N_ + 1): self.best_global_[0,j] = self.best_individual_[i,j] self.count_global = self.count_features( self.best_global_[0, :]) elif (best_ind == best_global and count_best_individual < self.count_global): self.local_improvement = 1 self.count_global = 0 for j in range(0, self.N_ + 1): self.best_global_[0, j] = self.best_individual_[i,j] self.count_global = self.count_features( self.best_global_[0, :]) else: continue def calculate_best_individual_type_4(self, pop): if self.initialize == 0: for i in range(0, len(pop)): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop[i,j] self.initialize = 1 return for i in range(0, len(pop)): candidate_a = pop[i, self.N_] candidate_b = self.best_individual_[i, self.N_] particle_count = self.count_features(self.pop_[i, :]) count_best_individual = self.count_features( self.best_individual_[i, :]) if particle_count > 0: if (self.is_solution_better(candidate_a,candidate_b) and particle_count <= count_best_individual): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop[i,j] elif (self.is_solution_better(candidate_a, 0.95 * candidate_b) and particle_count < count_best_individual): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop[i,j] else: continue def calculate_best_global_type_4(self): if self.initialize_1 == 0: for i in range(0, self.N_ + 1): self.best_global_[0,i] = self.best_individual_[0, i] self.initialize_1 = 1 self.count_global = self.count_features(self.best_global_[0, :]) for i in range(0, self.num_particles): best_ind = self.best_individual_[i, self.N_] best_global = self.best_global_[0, self.N_] count_best_individual = self.count_features(self.best_individual_[i, :]) if (self.is_solution_better(best_ind,best_global) and count_best_individual <= self.count_global): self.local_improvement = 1 for j in range(0, self.N_ + 1): self.best_global_[0,j] = self.best_individual_[i,j] self.count_global = self.count_features( self.best_global_[0, :]) elif (self.is_solution_better(best_ind, 0.95 * best_global) and count_best_individual < self.count_global): self.local_improvement = 1 self.count_global = 0 for j in range(0, self.N_ + 1): self.best_global_[0, j] = self.best_individual_[i,j] self.count_global = self.count_features( self.best_global_[0, :]) else: continue def count_features(self, particle_proportions, threshold=0.6): count = 0 for i in range(0, self.N_): if particle_proportions[i] > threshold: count = count + 1 return count @property def final_cols(self): return self._final_cols @property def final_index(self): return self._final_index
class PSOSelector(object): def __init__(self, estimator, w=0.7298, c1=1.49618, c2=1.49618, num_particles=30, max_iter=100, max_local_improvement=50, maximize_objective=True, initialization='uniform', fitness_method='type_2', cv = 3, verbose=True): self.w = w self.c1 = c1 self.c2 = c2 self.num_particles = num_particles self.max_iter = max_iter self.cv = cv self.evaluator_ = None self.estimator = estimator self.velocity_ = None self.best_individual_ = None self.best_global_ = None self.best_global_fm = 0 self.best_global_cp = 0 self.best_cp_ = 0 self.best_fm_ = 0 self.solution_ = None self.initialize = 0 self.initialize_1 = 0 self.verbose = verbose self.N = None self.max_local_improvement = max_local_improvement self.local_improvement = 0 self.particles = None self.count = [] self.N_ = 0 self.iteration_ = 0 self.pop_ = {} self.count_global = 0 self._setup_initialization(initialization, fitness_method) self._setup_solution_comparator(maximize_objective) self.selected_features_ = None def _setup_initialization(self, initialization, type_search): init_method = { 'uniform': create_population_uniform_strategy, '20_50': create_population_20_50_strategy } init_search = { 'type_1': self.search_type_1, 'type_2': self.search_type_2 } self._initialization = initialization if initialization not in init_method: raise PSOException(f'Invalid method {initialization!r}') self.init_method_ = init_method[initialization] self.init_search_ = init_search[type_search] def _setup_solution_comparator(self, maximize_objective): self.maximize_objective = maximize_objective if self.maximize_objective: self.is_solution_better = maximize_comparator else: self.is_solution_better = minimize_comparator def fit(self, X, unused_y = None, **kargs): if not isinstance(X, pd.DataFrame): raise PSOException('The "X" parameter must be a data frame') colunas_full = X.columns self._initialize(X) if unused_y.all() != None: self.pop_['cp'] = np.zeros(shape=(1, self.num_particles))[0] self.pop_['fm'] = np.zeros(shape=(1, self.num_particles))[0] prob = Problem(X, unused_y, self.estimator, self.cv, **kargs) self.N_ = prob.n_cols self.evaluator_ = SolutionEvaluator(prob, self.num_particles) self.velocity_ = np.zeros(shape=(self.num_particles, self.N_)) self.best_individual_, self.best_individual_[:] = \ np.zeros(shape=(self.num_particles, self.N_ + 1)), 'nan' self.best_global_, self.best_global_[:] = \ np.zeros(shape=(1, self.N_ + 1)), 'nan' self.solution_ = np.zeros(shape=(self.max_iter + 1, self.N_ + 1)) #Parameters to store the purity and fmeasure metric self.best_fm_ = np.zeros(shape=(1, self.num_particles))[0] self.best_cp_ = np.zeros(shape=(1, self.num_particles))[0] while not self._is_stop_criteria_accepted(): self.init_search_() count_sel_feat = self.count_features(self.best_global_[0]) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1]) self.selected_features_, = np.where(self.selected_features_.mask == True) colunas = colunas_full[self.selected_features_] if self.verbose: #breakpoint() interm_var = f'Iteration: {self.iteration_}/{self.max_iter} \n ,' interm_var = interm_var + f'Best global metric - CP X PF: {self.best_global_[:, -1]} \n , ' if unused_y.all() != None: interm_var = interm_var + f'Best global metric purity: {self.best_global_cp} \n ,' interm_var = interm_var + f'Best global metric f-measure: {self.best_global_fm} \n ,' interm_var = interm_var + f'Index features_selected: {self.selected_features_} \n , ' interm_var = interm_var + f'Number of selected features: {count_sel_feat} \n , ' interm_var = interm_var + f'Columns selected: {colunas}' rootLogger.info(interm_var) for i in range(0, self.num_particles): self.count.append(self.count_features( self.best_individual_[i, :])) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1]) self.selected_features_, = np.where(self.selected_features_.mask == True) def _initialize(self, X): self.iteration_ = 0 self.pop_['pop'] = self.init_method_(X, self.num_particles) def _is_stop_criteria_accepted(self): no_global_improv = self.local_improvement >= self.max_local_improvement max_iter_reached = self.iteration_ >= self.max_iter return max_iter_reached or no_global_improv def search_type_1(self): self.pop_ = self.evaluator_.evaluate(self.pop_) self.calculate_best_individual_pso_1_1(self.pop_) self.calculate_best_global_pso_1_1() self.solution_[self.iteration_, :] = self.best_global_ self.update_velocity() self.iteration_ += 1 def search_type_2(self): self.pop_ = self.evaluator_.evaluate(self.pop_) self.calculate_best_individual(self.pop_) self.calculate_best_global() self.solution_[self.iteration_, :] = self.best_global_ self.update_velocity() self.iteration_ += 1 def update_velocity(self): w = self.w c1, c2 = self.c1, self.c2 for i in range(0, len(self.pop_['pop']) - 1): for j in range(0, self.N_): r1= round(uniform(0,1), 2) r2 = round(uniform(0, 1), 2) pop = self.pop_['pop'][i, j] inertia = w * self.velocity_[i,j] cognitive = c1 * r1 * (self.best_individual_[i,j] - pop) social = c2 * r2 * (self.best_global_[0, j] - pop) velocity = inertia + cognitive + social self.velocity_[i,j] = velocity self.pop_['pop'][i,j] += velocity def calculate_best_individual(self, pop): if self.initialize == 0: for i in range(0, len(pop['pop'])): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop['pop'][i, j] if 'fm' in pop.keys(): self.best_fm_[i] = pop['fm'][i] if 'cp' in pop.keys(): self.best_cp_[i] = pop['cp'][i] self.initialize = 1 return for i in range(0, len(pop['pop'])): candidate_a = pop['pop'][i, self.N_] candidate_b = self.best_individual_[i, self.N_] if self.is_solution_better(candidate_a, candidate_b): for j in range(0, self.N_ + 1): self.best_individual_[i, j] = pop['pop'][i,j] if 'fm' in pop.keys(): self.best_fm_[i] = pop['fm'][i] if 'cp' in pop.keys(): self.best_cp_[i] = pop['cp'][i] continue particle_count = self.count_features(self.pop_['pop'][i, :]) count_best_individual = self.count_features( self.best_individual_[i, :]) if particle_count > 0: if (pop['pop'][i,self.N_] == self.best_individual_[i, self.N_] and particle_count < count_best_individual): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop['pop'][i,j] if 'fm' in pop.keys(): self.best_fm_[i] = pop['fm'][i] if 'cp' in pop.keys(): self.best_cp_[i] = pop['cp'][i] def calculate_best_global(self): if self.initialize_1 == 0: for i in range(0, self.N_ + 1): self.best_global_[0,i] = self.best_individual_[0, i] if 'fm' in self.pop_.keys(): self.best_global_fm = self.best_fm_[0] if 'cp' in self.pop_.keys(): self.best_global_cp = self.best_cp_[0] self.initialize_1 = 1 self.count_global = self.count_features(self.best_global_[0, :]) for i in range(0, self.num_particles): if self.is_solution_better(self.best_individual_[i, self.N_], self.best_global_[0, self.N_]): self.local_improvement = 1 self.count_global = 0 for j in range(0, self.N_ + 1): self.best_global_[0,j] = self.best_individual_[i,j] if 'fm' in self.pop_.keys(): self.best_global_fm = self.best_fm_[i] if 'cp' in self.pop_.keys(): self.best_global_cp = self.best_cp_[i] self.count_global = self.count_features( self.best_global_[0, :]) continue count_best_individual = self.count_features(self.best_individual_[i, :]) best_global = self.best_global_[0, self.N_] best_ind = self.best_individual_[i, self.N_] if (best_global == best_ind and count_best_individual < self.count_global): self.local_improvement = 1 self.count_global = 0 for j in range(0, self.N_ + 1): self.best_global_[0, j] = self.best_individual_[i,j] if 'fm' in self.pop_.keys(): self.best_globao_fm = self.best_fm_[i] if 'cp' in self.pop_.keys(): self.best_global_cp = self.best_cp_[i] self.count_global = self.count_features( self.best_global_[0, :]) self.local_improvement += 1 def calculate_best_individual_pso_1_1(self,pop): if self.initialize == 0: for i in range(0, len(pop['pop'])): for j in range(0, self.N_ + 1): self.best_individual_[i,j] = pop['pop'][i,j] if 'fm' in pop.keys(): self.best_fm_[i] = pop['fm'][i] if 'cp' in pop.keys(): self.best_cp_[i] = pop['cp'][i] self.initialize = 1 return for i in range(0, len(pop['pop'])): if self.is_solution_better(pop['pop'][i,self.N_], self.best_individual_[i, self.N_]): for j in range(0, self.N_ + 1): self.best_individual_[i, j] = pop['pop'][i,j] if 'fm' in pop.keys(): self.best_fm_[i] = pop['fm'][i] if 'cp' in pop.keys(): self.best_cp_[i] = pop['cp'][i] def calculate_best_global_pso_1_1(self): if self.initialize_1 == 0: for i in range(0, self.N_ + 1): self.best_global[0,i] = self.best_individual_[0,i] if 'fm' in self.pop_.keys(): self.best_global_fm = self.best_fm_[0] if 'cp' in self.pop_.keys(): self.best_global_cp = self.best_cp_[0] self.initialize_1 = 1 for i in range(0, len(self.pop_['pop'])): if self.is_solution_better(self.best_individual_[i, self.N_], self.best_global_[0, self.N_]): self.local_improvement = 1 for j in range(0, self.N_ + 1): self.best_global_[0,j] = self.best_individual_[i,j] if 'fm' in self.pop_.keys(): self.best_global_fm = self.best_fm_[i] if 'cp' in self.pop_.keys(): self.best_global_cp = self.best_cp[i] self.local_improvement += 1 def count_features(self, particle_proportions, threshold=0.6): count = 0 for i in range(0, self.N_): if particle_proportions[i] > threshold: count = count + 1 return count
def fit(self, X, unused_y = None, **kargs): if not isinstance(X, pd.DataFrame): raise PSOException('The "X" parameter must be a data frame') colunas_full = X.columns self._initialize(X) if unused_y.all() != None: self.pop_['cp'] = np.zeros(shape=(1, self.num_particles))[0] self.pop_['fm'] = np.zeros(shape=(1, self.num_particles))[0] prob = Problem(X, unused_y, self.estimator, self.cv, **kargs) self.N_ = prob.n_cols self.evaluator_ = SolutionEvaluator(prob, self.num_particles) self.velocity_ = np.zeros(shape=(self.num_particles, self.N_)) self.best_individual_, self.best_individual_[:] = \ np.zeros(shape=(self.num_particles, self.N_ + 1)), 'nan' self.best_global_, self.best_global_[:] = \ np.zeros(shape=(1, self.N_ + 1)), 'nan' self.solution_ = np.zeros(shape=(self.max_iter + 1, self.N_ + 1)) #Parameters to store the purity and fmeasure metric self.best_fm_ = np.zeros(shape=(1, self.num_particles))[0] self.best_cp_ = np.zeros(shape=(1, self.num_particles))[0] while not self._is_stop_criteria_accepted(): self.init_search_() count_sel_feat = self.count_features(self.best_global_[0]) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1]) self.selected_features_, = np.where(self.selected_features_.mask == True) colunas = colunas_full[self.selected_features_] if self.verbose: #breakpoint() interm_var = f'Iteration: {self.iteration_}/{self.max_iter} \n ,' interm_var = interm_var + f'Best global metric - CP X PF: {self.best_global_[:, -1]} \n , ' if unused_y.all() != None: interm_var = interm_var + f'Best global metric purity: {self.best_global_cp} \n ,' interm_var = interm_var + f'Best global metric f-measure: {self.best_global_fm} \n ,' interm_var = interm_var + f'Index features_selected: {self.selected_features_} \n , ' interm_var = interm_var + f'Number of selected features: {count_sel_feat} \n , ' interm_var = interm_var + f'Columns selected: {colunas}' rootLogger.info(interm_var) for i in range(0, self.num_particles): self.count.append(self.count_features( self.best_individual_[i, :])) best_glob = self.best_global_[0] self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1]) self.selected_features_, = np.where(self.selected_features_.mask == True)