Пример #1
0
    def fitness(self, particle, metric):

        matrix_length = len(np.unique(self.data[self.target]))

        if self.mode == 'sgd':
            model = SGDClassifier(class_weight='balanced',
                                  loss='modified_huber',
                                  random_state=1)
        elif self.mode == 'svr':
            model = SVC(kernel='linear',
                        class_weight='balanced',
                        probability=True)
        elif self.mode == 'rdf':
            model = SVC(kernel='rbf',
                        class_weight='balanced',
                        probability=True)
        elif self.mode == 'pol':
            model = SVC(kernel='poly',
                        class_weight='balanced',
                        probability=True)
        elif self.mode == 'rdc':
            model = RandomForestClassifier(n_estimators=10,
                                           class_weight='balanced',
                                           random_state=1)
        elif self.mode == 'dtc':
            model = DecisionTreeClassifier(class_weight='balanced',
                                           random_state=1)
        elif self.mode == 'gdc':
            model = GradientBoostingClassifier(random_state=1)
        elif self.mode == 'etc':
            model = ExtraTreesClassifier(class_weight='balanced',
                                         random_state=1)
        elif self.mode == 'adc':
            model = AdaBoostClassifier(random_state=1)
        elif self.mode == 'bac':
            model = BaggingClassifier(random_state=1)
        elif self.mode == 'lda':
            model = LinearDiscriminantAnalysis()
        elif self.mode == 'qda':
            model = QuadraticDiscriminantAnalysis()
        elif self.mode == 'gnb':
            model = GaussianNB()
        elif self.mode == 'rrc':
            model = RidgeClassifier(class_weight='balanced')
        else:
            model = LogisticRegression(solver='liblinear',
                                       C=10.0,
                                       class_weight='balanced')
        k = model_selection.StratifiedKFold(5)
        try:
            tab_data, tab_val = tab.get([int(x) for x in particle.posiion],
                                        self.tab_data, self.tab_vals)
            tab_val = np.array(tab_val)
            accuracy = (utility.getTotalTruePositive(tab_val) + utility.getTotalTrueNegative(tab_val)) / \
                       (utility.getTotalTruePositive(tab_val) + utility.getTotalTrueNegative(tab_val) +
                        utility.getTotalFalsePositive(tab_val) + utility.getTotalFalseNegative(tab_val))
            precision_tab = []
            recall_tab = []
            for i in range(len(tab_val)):
                a = utility.getTruePositive(
                    tab_val, i) / (utility.getFalsePositive(tab_val, i) +
                                   utility.getTruePositive(tab_val, i))
                b = utility.getTruePositive(
                    tab_val, i) / (utility.getFalseNegative(tab_val, i) +
                                   utility.getTruePositive(tab_val, i))
                precision_tab.append(a)
                recall_tab.append(b)
            precision = sum(precision_tab) / len(precision_tab)
            recall = sum(recall_tab) / len(recall_tab)
            fscore = 2 * (1 / ((1 / precision) + (1 / recall)))
            matrix = tab_val
            tmp = self.data.drop([self.target], axis=1)
            tmp = tmp.iloc[:, particle.position]
            cols = tmp.columns
            self.tab_find = self.tab_find + 1
        except AttributeError:
            matrix = np.zeros((matrix_length, matrix_length), dtype=int)
            X, y, cols = utility.ready(self, particle.position, self.data,
                                       self.dummiesList, self.createDummies,
                                       self.normalize)
            originalclass = []
            predictedclass = []
            for train_index, test_index in k.split(X, y):  # Split in X
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                if self.mode == ('knn' or 'dct' or 'gbc' or 'lda' or 'qda'
                                 or 'adc' or 'bac'):
                    if self.mode == 'knn':
                        model = KNeighborsClassifier(
                            n_neighbors=int(len(X_train)**(1 / 2)))
                    sm = SMOTE(sampling_strategy='auto')
                    X_train, y_train = sm.fit_resample(X_train, y_train)

                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)

                originalclass.extend(y_test)
                predictedclass.extend(y_pred)

                matrix += confusion_matrix(y_test, y_pred)

            accuracy = (utility.getTotalTruePositive(matrix) + utility.getTotalTrueNegative(matrix)) / \
                       (utility.getTotalTruePositive(matrix) + utility.getTotalTrueNegative(matrix) +
                        utility.getTotalFalsePositive(matrix) + utility.getTotalFalseNegative(matrix))

            precision, recall, fscore, support = s(originalclass,
                                                   predictedclass,
                                                   average='macro')
            self.tab_data, self.tab_vals = tab.add(
                [int(x) for x in particle.position], matrix.tolist(),
                self.tab_data, self.tab_vals)
            self.tab_insert = self.tab_insert + 1

        if metric == 'accuracy' or 'exactitude':
            score = accuracy
        elif metric == 'recall' or 'rappel':
            score = recall
        elif metric == 'precision' or 'précision':
            score = precision
        elif metric == 'fscore':
            score = fscore
        else:
            score = accuracy

        return score, accuracy, recall, precision, fscore, cols, matrix
Пример #2
0
    def optimization(self, part, n_pop, n_gen, w, c1, c2, data, dummiesList,
                     createDummies, normalize, metric, x, y, besties, names,
                     iters):

        debut = time.time()

        for mode in part:

            folderName = mode.upper()

            utility.createDirectory(path=self.path2, folderName=folderName)

            search_space = Swarm(n_pop, data.columns.size - 1, data,
                                 self.target, mode, dummiesList, createDummies,
                                 normalize, w, c1, c2)
            particles_vector = [
                Particle(data.columns.size - 1)
                for _ in range(search_space.n_particles)
            ]
            search_space.particles = particles_vector

            cols = self.data.drop([self.target], axis=1).columns

            u, c = np.unique(data[self.target], return_counts=True)
            unique = list(u)

            # Initialisation du tableau
            search_space.tab_data, search_space.tab_vals = tab.init(
                size=len(cols),
                matrix_size=len(unique),
                filename='tab_' + self.data_name + '_' + mode)

            x1 = []
            y1 = []

            x2 = []
            yX = []

            iteration = 0
            while iteration < n_gen:
                instant = time.time()

                search_space.set_pbest(metric)
                search_space.set_gbest(metric)

                search_space.move_particles()

                print("mode: ", mode, " valeur: ", search_space.gbest_value,
                      " itération: ", iteration, " temps exe: ",
                      str(timedelta(seconds=(time.time() - instant))),
                      " temps total: ",
                      str(timedelta(seconds=(time.time() - debut))))

                x1.append(iteration)
                y1.append(search_space.gbest_value)

                tmp = []
                tmp2 = []
                for i in range(len(search_space.gbest_matrix)):
                    tmp.append(
                        utility.getTruePositive(search_space.gbest_matrix, i) /
                        (utility.getFalseNegative(search_space.gbest_matrix, i)
                         + utility.getTruePositive(search_space.gbest_matrix,
                                                   i)))
                    tmp2.append(0)
                x2.append(tmp2[:])
                yX.append(tmp[:])

                fig, ax = plt.subplots()
                ax.plot(x1, y1)
                # ax.plot(x1, y2)
                ax.set_title("Evolution du score par génération (" +
                             folderName + ")" +
                             "\nOptimisation par essaim de particule")
                ax.set_xlabel("génération")
                ax.set_ylabel(metric)
                ax.grid()
                ax.legend(labels=["Le meilleur"],
                          loc='center left',
                          bbox_to_anchor=(1.04, 0.5),
                          borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName),
                                 'plot_' + str(1) + '.png')
                b = os.path.join(os.getcwd(), a)
                # if iteration == n_gen - 1:
                fig.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig)

                fig2, ax2 = plt.subplots()

                ax2.plot(x1, yX)

                ax2.set_title(
                    "Evolution du score par génération pour chacune des classes ("
                    + folderName + ")" +
                    "\nOptimisation par essaim de particule")
                ax2.set_xlabel("génération")
                ax2.set_ylabel(metric)
                ax2.grid()
                ax2.legend(labels=unique,
                           loc='center left',
                           bbox_to_anchor=(1.04, 0.5),
                           borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName),
                                 'plotb_' + str(1) + '.png')
                b = os.path.join(os.getcwd(), a)
                # if iteration == n_gen-1:
                fig2.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig2)

                iteration = iteration + 1

                self.write_res(folderName=folderName,
                               mode=mode,
                               n_pop=n_pop,
                               n_gen=n_gen,
                               w=w,
                               c1=c1,
                               c2=c2,
                               y1=y1,
                               yX=yX,
                               colMax=search_space.gbest_column,
                               bestScore=search_space.gbest_value,
                               bestScoreA=search_space.gbest_accuracy,
                               bestScoreP=search_space.gbest_precision,
                               bestScoreR=search_space.gbest_recall,
                               bestScoreF=search_space.gbest_fscore,
                               bestModel=search_space.gbest_matrix,
                               debut=debut,
                               insert=search_space.tab_insert,
                               find=search_space.tab_find)

                if (iteration % 5) == 0:
                    print(
                        "Sauvegarde du tableau actuel dans les fichiers, itération:",
                        iteration)
                    tab.dump(search_space.tab_data, search_space.tab_vals,
                             'tab_' + self.data_name + '_' + mode)

            arg1, arg2 = utility.getList(
                bestModel=search_space.gbest_matrix,
                bestScore=search_space.gbest_value,
                bestScoreA=search_space.gbest_accuracy,
                bestScoreP=search_space.gbest_precision,
                bestScoreR=search_space.gbest_recall,
                bestScoreF=search_space.gbest_fscore,
                bestCols=search_space.gbest_column,
                indMax=search_space.gbest_position,
                unique=unique,
                mode=mode)

            x.put(list(arg1))
            y.put(list(arg2))
            besties.put(y1)
            names.put(folderName + ": " +
                      "{:.3f}".format(search_space.gbest_value))
            iters.put(iteration)

            tab.dump(search_space.tab_data, search_space.tab_vals,
                     'tab_' + self.data_name + '_' + mode)
Пример #3
0
    def optimization(self, part, n_gen, n_gen_vnd, kmax, n_neighbors, data,
                     dummiesList, createDummies, normalize, metric, x, y, besties, names, iters):

        debut = time.time()

        for mode in part:

            folderName = mode.upper()

            utility.createDirectory(path=self.path2, folderName=folderName)

            cols = self.data.drop([self.target], axis=1).columns

            iteration = 0

            u, c = np.unique(data[self.target], return_counts=True)
            unique = list(u)

            # Initialisation du tableau
            self.tab_data, self.tab_vals = tab.init(size=len(cols), matrix_size=len(unique),
                                                    filename='tab_' + self.data_name + '_' + mode)

            x1 = []
            y1 = []
            y2 = []

            x2 = []
            yX = []

            initial_solution = np.random.choice(a=[False, True], size=self.copy.columns.size - 1)
            solution = initial_solution

            accuracy, recall, precision, fscore, cols, model, obj = \
                utility.fitness2(self, mode, solution, data, dummiesList, createDummies, normalize)

            self.tab_data, self.tab_vals, self.tab_insert, self.tab_find =\
                obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find

            if metric == 'accuracy' or 'exactitude':
                res_sol = accuracy
            elif metric == 'recall' or 'rappel':
                res_sol = recall
            elif metric == 'precision' or 'précision':
                res_sol = precision
            elif metric == 'fscore':
                res_sol = fscore
            else:
                res_sol = accuracy

            best_solution = solution
            best_res = res_sol
            best_accuracy = accuracy
            best_precision = precision
            best_recall = recall
            best_fscore = fscore
            best_cols = cols
            best_model = model

            while iteration < n_gen:
                instant = time.time()
                res_prime, res_sol_prime, accuracy_prime, precision_prime, recall_prime, fscore_prime, cols_prime,\
                model_prime = self.k_opt(
                    best_res, best_solution, best_accuracy, best_precision, best_recall, best_fscore, best_cols,
                    best_model, data, mode, dummiesList, createDummies, normalize, metric
                )

                if res_prime > best_res:
                    best_solution = res_sol_prime
                    best_res = res_prime
                    best_accuracy = accuracy_prime
                    best_precision = precision_prime
                    best_recall = recall_prime
                    best_fscore = fscore_prime
                    best_cols = cols_prime
                    best_model = model_prime

                res_nei, res_sol, accuracy_n, precision_n, recall_n, fscore_n, cols_n, model_n = self.vnd(
                    n_gen_vnd, kmax, n_neighbors, res_sol_prime, best_res, best_solution, best_accuracy, best_precision,
                    best_recall, best_fscore, best_cols, best_model, data, mode, dummiesList, createDummies, normalize,
                    metric)

                if res_nei > best_res:
                    best_solution = res_sol
                    best_res = res_nei
                    best_accuracy = accuracy_n
                    best_precision = precision_n
                    best_recall = recall_n
                    best_fscore = fscore_n
                    best_cols = cols_n
                    best_model = model_n

                print("mode: ", mode, " valeur: ", best_res, " iteration: ", iteration,
                      " temps exe: ", str(timedelta(seconds=(time.time() - instant))),
                      " temps total: ", str(timedelta(seconds=(time.time() - debut))))

                x1.append(iteration)
                y1.append(best_res)

                tmp = []
                tmp2 = []
                for i in range(len(best_model)):
                    tmp.append(utility.getTruePositive(best_model, i) /
                               (utility.getFalseNegative(best_model, i) +
                                utility.getTruePositive(best_model, i)))
                    tmp2.append(0)
                x2.append(tmp2[:])
                yX.append(tmp[:])

                fig, ax = plt.subplots()
                ax.plot(x1, y1)
                ax.set_title("Evolution du score par génération (" + folderName + ")"
                             + "\nRecherche locale itérée")
                ax.set_xlabel("génération")
                ax.set_ylabel(metric)
                ax.grid()
                ax.legend(labels=["Le meilleur"],
                          loc='center left', bbox_to_anchor=(1.04, 0.5), borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName), 'plot_' + str(n_gen) + '.png')
                b = os.path.join(os.getcwd(), a)
                #if iteration == n_gen - 1:
                fig.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig)

                fig2, ax2 = plt.subplots()

                ax2.plot(x1, yX)

                ax2.set_title("Evolution du score par génération pour chacune des classes (" + folderName + ")"
                              + "\nRecherrche locale itérée")
                ax2.set_xlabel("génération")
                ax2.set_ylabel(metric)
                ax2.grid()
                ax2.legend(labels=unique, loc='center left', bbox_to_anchor=(1.04, 0.5), borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName), 'plotb_' + str(n_gen) + '.png')
                b = os.path.join(os.getcwd(), a)
                #if iteration == n_gen - 1:
                fig2.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig2)

                iteration = iteration + 1

                self.write_res(folderName=folderName, mode=mode, n_gen=n_gen, n_gen_vnd=n_gen_vnd,
                               kmax=kmax, n_neighbors=n_neighbors, y1=y1, y2=y2, yX=yX, colMax=best_cols,
                               bestScore=best_res, bestScoreA=best_accuracy, bestScoreP=best_precision,
                               bestScoreR=best_recall, bestScoreF=best_fscore, bestModel=best_model, debut=debut)

                if (iteration % 5) == 0:
                    print("Sauvegarde du tableau actuel dans les fichiers, itération:", iteration)
                    tab.dump(self.tab_data, self.tab_vals, 'tab_' + self.data_name + '_' + mode)

            arg1, arg2 = utility.getList(bestModel=best_model, bestScore=best_res, bestScoreA=best_accuracy,
                                         bestScoreP=best_precision, bestScoreR=best_recall, bestScoreF=best_fscore,
                                         bestCols=best_cols, indMax=best_solution, unique=unique, mode=mode)

            x.put(list(arg1))
            y.put(list(arg2))
            besties.put(y1)
            names.put(folderName + ": " + "{:.3f}".format(best_res))
            iters.put(iteration)
Пример #4
0
    def natural_selection(self, part, n_pop, n_gen, cross_proba, F, data,
                          dummiesList, createDummies, normalize, metric, x, y,
                          besties, names, iters):

        debut = time.time()

        for mode in part:

            folderName = mode.upper()

            utility.createDirectory(path=self.path2, folderName=folderName)

            # Les axes pour le graphique
            x1 = []
            y1 = []
            y2 = []

            x2 = []
            yX = []

            scoreMax = 0
            modelMax = 0
            scoreMax = 0
            modelMax = 0
            indMax = 0
            colMax = 0
            scoreAMax = 0
            scorePMax = 0
            scoreRMax = 0
            scoreFMax = 0

            cols = self.data.drop([self.target], axis=1).columns

            u, c = np.unique(data[self.target], return_counts=True)
            unique = list(u)

            # Initialisation du tableau
            self.tab_data, self.tab_vals = tab.init(
                size=len(cols),
                matrix_size=len(unique),
                filename='tab_' + self.data_name + '_' + mode)

            # Progression des meilleurs éléments
            bestScorePro = []
            bestModelPro = []
            bestIndsPro = []
            bestColsPro = []
            bestAPro = []
            bestPPro = []
            bestRPro = []
            bestFPro = []

            # Mesurer le temps d'execution
            instant = time.time()

            # Initialise notre population
            pop = utility.create_population(inds=n_pop,
                                            size=self.copy.columns.size - 1)

            scores, models, inds, cols, scoresA, scoresP, scoresR, scoresF, obj = \
                utility.fitness(self=self, pop=pop, mode=mode, data=data, dummiesList=dummiesList,
                                createDummies=createDummies, normalize=normalize, metric=metric)

            self.tab_data, self.tab_vals, self.tab_insert, self.tab_find =\
                obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find

            bestScore = np.max(scores)
            bestModel = models[np.argmax(scores)]
            bestInd = inds[np.argmax(scores)]
            bestCols = cols[np.argmax(scores)]
            bestScoreA = scoresA[np.argmax(scores)]
            bestScoreP = scoresP[np.argmax(scores)]
            bestScoreR = scoresR[np.argmax(scores)]
            bestScoreF = scoresF[np.argmax(scores)]

            bestScorePro.append(bestScore)
            bestModelPro.append(bestModel)
            bestIndsPro.append(bestInd)
            bestColsPro.append(bestCols)
            bestAPro.append(bestScoreA)
            bestPPro.append(bestScoreP)
            bestRPro.append(bestScoreR)
            bestFPro.append(bestScoreF)

            x1.append(0)
            y1.append(np.mean(heapq.nlargest(int(n_pop / 2), scores)))
            y2.append(bestScore)

            tmp = []
            tmp2 = []
            for i in range(len(bestModel)):
                tmp.append(
                    utility.getTruePositive(bestModel, i) /
                    (utility.getFalseNegative(bestModel, i) +
                     utility.getTruePositive(bestModel, i)))
                tmp2.append(0)
            x2.append(tmp2[:])
            yX.append(tmp[:])

            print(mode + " génération: 0" + " moyenne: " +
                  str(np.mean(heapq.nlargest(int(n_pop / 2), scores))) +
                  " meilleur: " + str(bestScore) + " temps exe: " +
                  str(timedelta(seconds=(time.time() - instant))) +
                  " temps total: " +
                  str(timedelta(seconds=(time.time() - debut))))

            generation = 0
            for generation in range(n_gen):

                instant = time.time()

                # Liste des mutants
                mutants = []

                for i in range(n_pop):

                    # Selection des 3 individus aléatoires de la population actuelle
                    idxs = [idx for idx in range(n_pop) if idx != i]
                    selected = np.random.choice(idxs, 3, replace=False)
                    xr1, xr2, xr3 = pop[selected]

                    # mutation
                    mutant = self.mutate(pop, xr1, xr2, xr3, F)

                    # croisement
                    trial = self.crossover(pop[i], mutant, cross_proba)

                    mutants.append(trial)

                # Calcul du score pour l'ensemble des mutants
                scores_m, models_m, inds_m, cols_m, scoresA_m, scoresP_m, scoresR_m, scoresF_m, obj = \
                    utility.fitness(self=self, pop=mutants, mode=mode, data=data, dummiesList=dummiesList,
                                    createDummies=createDummies, normalize=normalize, metric=metric)

                self.tab_data, self.tab_vals, self.tab_insert, self.tab_find = \
                    obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find

                # selection des meilleurs individus
                pop_score = zip(pop, scores, models, cols, scoresA, scoresP,
                                scoresR, scoresF)
                mut_score = zip(mutants, scores_m, models_m, cols_m, scoresA_m,
                                scoresP_m, scoresR_m, scoresF_m)

                pop, scores, models, cols, scoresA, scoresP, scoresR, scoresF = \
                    self.selection(pop_score, mut_score, n_pop)

                bestScore = np.max(scores)
                bestModel = models[np.argmax(scores)]
                bestInd = pop[np.argmax(scores)]
                bestCols = cols[np.argmax(scores)]
                bestScoreA = scoresA[np.argmax(scores)]
                bestScoreP = scoresP[np.argmax(scores)]
                bestScoreR = scoresR[np.argmax(scores)]
                bestScoreF = scoresF[np.argmax(scores)]
                bestScorePro.append(bestScore)
                bestModelPro.append(bestModel)
                bestIndsPro.append(bestInd)
                bestColsPro.append(list(bestCols))
                bestAPro.append(bestScoreA)
                bestPPro.append(bestScoreP)
                bestRPro.append(bestScoreR)
                bestFPro.append(bestScoreF)

                c = Counter(scores)

                print(mode + " génération: " + str(generation + 1) +
                      " moyenne: " +
                      str(np.mean(heapq.nlargest(int(n_pop / 2), scores))) +
                      " meilleur: " + str(bestScore) + " temps exe: " +
                      str(timedelta(seconds=(time.time() - instant))) +
                      " temps total: " +
                      str(timedelta(seconds=(time.time() - debut))))

                x1.append(generation + 1)
                # La moyenne sur les n_pop/2 premiers de la population
                y1.append(np.mean(heapq.nlargest(int(n_pop / 2), scores)))
                y2.append(bestScore)
                fig, ax = plt.subplots()
                ax.plot(x1, y1)
                ax.plot(x1, y2)
                ax.set_title("Evolution du score par génération (" +
                             folderName + ")")
                ax.set_xlabel("génération")
                ax.set_ylabel(metric)
                ax.grid()
                ax.legend(labels=[
                    "moyenne des " + str(int(n_pop / 2)) + " meilleurs",
                    "Le meilleur"
                ],
                          loc='center left',
                          bbox_to_anchor=(1.04, 0.5),
                          borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName),
                                 'plot_' + str(n_gen) + '.png')
                b = os.path.join(os.getcwd(), a)
                # if generation == n_gen - 1:
                fig.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig)

                fig2, ax2 = plt.subplots()

                tmp = []
                tmp2 = []
                for i in range(len(bestModel)):
                    tmp.append(
                        utility.getTruePositive(bestModel, i) /
                        (utility.getFalseNegative(bestModel, i) +
                         utility.getTruePositive(bestModel, i)))
                    tmp2.append(generation + 1)
                yX.append(tmp[:])
                x2.append(tmp2[:])

                ax2.plot(x1, yX)

                ax2.set_title(
                    "Evolution du score par génération pour chacune des classes ("
                    + folderName + ")")
                ax2.set_xlabel("génération")
                ax2.set_ylabel(metric)
                ax2.grid()
                ax2.legend(labels=unique,
                           loc='center left',
                           bbox_to_anchor=(1.04, 0.5),
                           borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName),
                                 'plotb_' + str(n_gen) + '.png')
                b = os.path.join(os.getcwd(), a)
                # if generation == n_gen-1:
                fig2.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig2)

                generation = generation + 1

                if bestScore > scoreMax:
                    scoreMax = bestScore
                    modelMax = bestModel
                    indMax = bestInd
                    colMax = bestCols
                    scoreAMax = bestScoreA
                    scorePMax = bestScoreP
                    scoreRMax = bestScoreR
                    scoreFMax = bestScoreF

                self.write_res(folderName=folderName,
                               mode=mode,
                               n_pop=n_pop,
                               n_gen=n_gen,
                               cross_proba=cross_proba,
                               F=F,
                               y1=y1,
                               y2=y2,
                               yX=yX,
                               colMax=colMax,
                               bestScorePro=bestScorePro,
                               bestAPro=bestAPro,
                               bestPPro=bestPPro,
                               bestRPro=bestRPro,
                               bestFPro=bestFPro,
                               bestModelPro=bestModelPro,
                               bestScore=bestScore,
                               bestScoreA=bestScoreA,
                               bestScoreP=bestScoreP,
                               bestScoreR=bestScoreR,
                               bestScoreF=bestScoreF,
                               bestModel=bestModel,
                               debut=debut)

                if (generation % 5) == 0:
                    print(
                        "Sauvegarde du tableau actuel dans les fichiers, génération:",
                        generation)
                    tab.dump(self.tab_data, self.tab_vals,
                             'tab_' + self.data_name + '_' + mode)

            arg1, arg2 = utility.getList(bestModel=modelMax,
                                         bestScore=scoreMax,
                                         bestScoreA=scoreAMax,
                                         bestScoreP=scorePMax,
                                         bestScoreR=scoreRMax,
                                         bestScoreF=scoreFMax,
                                         bestCols=colMax,
                                         indMax=indMax,
                                         unique=unique,
                                         mode=mode)

            x.put(list(arg1))
            y.put(list(arg2))
            besties.put(y2)
            names.put(folderName + ": " + "{:.3f}".format(scoreMax))
            iters.put(generation)

            tab.dump(self.tab_data, self.tab_vals,
                     'tab_' + self.data_name + '_' + mode)
Пример #5
0
    def optimization(self, part, temperature, alpha, final_temperature, data,
                     dummiesList, createDummies, normalize, metric, x, y,
                     besties, names, iters):

        debut = time.time()

        for mode in part:

            folderName = mode.upper()

            utility.createDirectory(path=self.path2, folderName=folderName)

            iteration = 0

            cols = self.data.drop([self.target], axis=1).columns

            u, c = np.unique(data[self.target], return_counts=True)
            unique = list(u)

            # Initialisation du tableau
            self.tab_data, self.tab_vals = tab.init(
                size=len(cols),
                matrix_size=len(unique),
                filename='tab_' + self.data_name + '_' + mode)

            x1 = []
            y1 = []
            y2 = []

            x2 = []
            yX = []

            solution = np.random.choice(a=[False, True],
                                        size=self.copy.columns.size - 1)

            best_solution = None
            best_res = 0
            best_accuracy = None
            best_precision = None
            best_recall = None
            best_fscore = None
            best_cols = None
            best_model = None

            begin_temperature = temperature

            while temperature > final_temperature:
                instant = time.time()

                mutate_index = random.sample(range(0, len(solution)), 1)
                neighbor = solution.copy()
                for m in mutate_index:
                    neighbor[m] = not neighbor[m]

                accuracy, recall, precision, fscore, cols, model, obj = \
                    utility.fitness2(self, mode, solution, data, dummiesList, createDummies, normalize)

                self.tab_data, self.tab_vals, self.tab_insert, self.tab_find = \
                    obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find

                accuracy_n, recall_n, precision_n, fscore_n, cols_n, model_n, obj = \
                    utility.fitness2(self, mode, neighbor, data, dummiesList, createDummies, normalize)

                self.tab_data, self.tab_vals, self.tab_insert, self.tab_find = \
                    obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find

                if metric == 'accuracy' or 'exactitude':
                    res_sol = accuracy
                    res_nei = accuracy_n
                elif metric == 'recall' or 'rappel':
                    res_sol = recall
                    res_nei = recall_n
                elif metric == 'precision' or 'précision':
                    res_sol = precision
                    res_nei = precision_n
                elif metric == 'fscore':
                    res_sol = fscore
                    res_nei = fscore_n
                else:
                    res_sol = accuracy
                    res_nei = accuracy_n

                if res_sol > best_res:
                    best_solution = solution
                    best_res = res_sol
                    best_accuracy = accuracy
                    best_precision = precision
                    best_recall = recall
                    best_fscore = fscore
                    best_cols = cols
                    best_model = model

                cost = res_nei - res_sol
                if cost >= 0:
                    solution = neighbor
                    res_sol = res_nei
                else:
                    r = random.uniform(0, 1)
                    if r < math.exp(-cost / temperature):
                        solution = neighbor
                        res_sol = res_nei

                print("mode: ", mode, " valeur: ", best_res, " iteration: ",
                      iteration, " temps exe: ",
                      str(timedelta(seconds=(time.time() - instant))),
                      " temps total: ",
                      str(timedelta(seconds=(time.time() - debut))))

                x1.append(iteration)
                y1.append(best_res)
                y2.append(res_sol)

                tmp = []
                tmp2 = []
                for i in range(len(best_model)):
                    tmp.append(
                        utility.getTruePositive(best_model, i) /
                        (utility.getFalseNegative(best_model, i) +
                         utility.getTruePositive(best_model, i)))
                    tmp2.append(0)
                x2.append(tmp2[:])
                yX.append(tmp[:])

                fig, ax = plt.subplots()
                ax.plot(x1, y1)
                ax.plot(x1, y2)
                ax.set_title("Evolution du score par génération (" +
                             folderName + ")" + "\nRecuit simulé")
                ax.set_xlabel("génération")
                ax.set_ylabel(metric)
                ax.grid()
                ax.legend(labels=["Le meilleur", "Valeur actuelle"],
                          loc='center left',
                          bbox_to_anchor=(1.04, 0.5),
                          borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName),
                                 'plot_' + str(begin_temperature) + '.png')
                b = os.path.join(os.getcwd(), a)
                # if iteration == begin_temperature - 1:
                fig.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig)

                fig2, ax2 = plt.subplots()

                ax2.plot(x1, yX)

                ax2.set_title(
                    "Evolution du score par génération pour chacune des classes ("
                    + folderName + ")" + "\nRecuit simulé")
                ax2.set_xlabel("génération")
                ax2.set_ylabel(metric)
                ax2.grid()
                ax2.legend(labels=unique,
                           loc='center left',
                           bbox_to_anchor=(1.04, 0.5),
                           borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName),
                                 'plotb_' + str(begin_temperature) + '.png')
                b = os.path.join(os.getcwd(), a)
                # if iteration == begin_temperature - 1:
                fig2.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig2)

                temperature = temperature - alpha
                iteration = iteration + 1

            self.write_res(folderName=folderName,
                           mode=mode,
                           temperature=begin_temperature,
                           alpha=alpha,
                           final_temperature=final_temperature,
                           y1=y1,
                           y2=y2,
                           yX=yX,
                           colMax=best_cols,
                           bestScore=best_res,
                           bestScoreA=best_accuracy,
                           bestScoreP=best_precision,
                           bestScoreR=best_recall,
                           bestScoreF=best_fscore,
                           bestModel=best_model,
                           debut=debut)

            if (iteration % 5) == 0:
                print(
                    "Sauvegarde du tableau actuel dans les fichiers, itération:",
                    iteration)
                tab.dump(self.tab_data, self.tab_vals,
                         'tab_' + self.data_name + '_' + mode)

            arg1, arg2 = utility.getList(bestModel=best_model,
                                         bestScore=best_res,
                                         bestScoreA=best_accuracy,
                                         bestScoreP=best_precision,
                                         bestScoreR=best_recall,
                                         bestScoreF=best_fscore,
                                         bestCols=best_cols,
                                         indMax=best_solution,
                                         unique=unique,
                                         mode=mode)

            x.put(list(arg1))
            y.put(list(arg2))
            besties.put(y2)
            names.put(folderName + ": " + "{:.3f}".format(best_res))
            iters.put(iteration)

            tab.dump(self.tab_data, self.tab_vals,
                     'tab_' + self.data_name + '_' + mode)