def grid(self): train_labels, train_array, test_array = LearnUtils.get_learn_data() test_labels = np.repeat(LearnUtils.get_encoded_labels(), 2) ps = PredefinedSplit(np.append(np.full((train_array.shape[0]), -1, dtype=int), np.full((test_array.shape[0]), 0, dtype=int))) param_grid = dict(n_neighbors=list(range(1, train_array.shape[0] - 1))) clf = self.get_classifier() grid = GridSearchCV(clf, param_grid, cv=ps) grid.fit(np.append(train_array, test_array, axis=0), np.append(train_labels, test_labels, axis=0)) return grid
def grid(self): train_labels, train_array, test_array = LearnUtils.get_learn_data() test_labels = np.repeat(LearnUtils.get_encoded_labels(), 2) ps = PredefinedSplit(np.append(np.full((train_array.shape[0]), -1, dtype=int), np.full((test_array.shape[0]), 0, dtype=int))) param_grid = dict(C=[0.001, 0.01, 0.1, 1, 10, 20, 100, 1000], gamma=[0.001, 0.01, 0.1, 1, 2, 5, 10], kernel=["linear", "poly", "rbf", "sigmoid"]) clf = self.get_classifier() grid = GridSearchCV(clf, param_grid, cv=ps) grid.fit(np.append(train_array, test_array, axis=0), np.append(train_labels, test_labels, axis=0)) return grid
def grid(self): train_labels, train_array, test_array = LearnUtils.get_learn_data() test_labels = np.repeat(LearnUtils.get_encoded_labels(), 2) ps = PredefinedSplit(np.append(np.full((train_array.shape[0]), -1, dtype=int), np.full((test_array.shape[0]), 0, dtype=int))) param_grid = dict( n_estimators=[10, 50, 100, 200, 500], max_features=["auto", "sqrt", "log2"], max_depth=[None, 2, 3, 4, 5, 6, 7, 8], criterion=["gini", "entropy"] ) clf = self.get_classifier() grid = GridSearchCV(clf, param_grid, cv=ps) grid.fit(np.append(train_array, test_array, axis=0), np.append(train_labels, test_labels, axis=0)) return grid
def learn(self): labels, train_array, test_array = LearnUtils.get_learn_data() clf = self.get_classifier() clf.fit(train_array, labels) return clf.predict(test_array).tolist()
def plot_confusion_matrix(y_true, y_pred, normalize=False, title=None, cmap=plt.cm.get_cmap("Reds")): """ This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """ if not title: if normalize: title = 'Нормальзованная матрица смещения' else: title = 'Confusion matrix, without normalization' # Compute confusion matrix cm = confusion_matrix(y_true, y_pred) # Only use the labels that appear in the data classes = LearnUtils.decode_labels(unique_labels(y_true, y_pred)) if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Нормальзованная матрица смещения") else: print('Confusion matrix, without normalization') # print(cm) fig, ax = plt.subplots(figsize=(10, 8)) im = ax.imshow(cm, interpolation='nearest', cmap=cmap) ax.figure.colorbar(im, ax=ax) # We want to show all ticks... ax.set( xticks=np.arange(cm.shape[1]), yticks=np.arange(cm.shape[0]), # ... and label them with the respective list entries xticklabels=classes, yticklabels=classes, title=title, ylabel='Тестовые классы', xlabel='Оценочные классы') # Rotate the tick labels and set their alignment. plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") # Loop over data dimensions and create text annotations. fmt = '.1f' if normalize else 'd' thresh = cm.max() / 2. for i in range(cm.shape[0]): for j in range(cm.shape[1]): ax.text(j, i, format(cm[i, j], fmt), ha="center", va="center", color="white" if cm[i, j] > thresh else "black") fig.tight_layout() return fig
def learn(self, feature_method_name: str = None) -> List[int]: labels, train_array, test_array = LearnUtils.get_learn_data() if feature_method_name is not None: feature_filter = self.__feature_methods[feature_method_name] feature_filter.fit(train_array, labels) train_array = feature_filter.transform(train_array) test_array = feature_filter.transform(test_array) clf = self.__create_classifier() clf.fit(train_array, labels) return clf.predict(test_array).tolist()
def get_average_precision_score(y_test, y_score): classes = LearnUtils.get_encoded_labels() y_test = label_binarize(y_test, classes=classes) precision = dict() recall = dict() average_precision = dict() for i in range(len(classes)): precision[i], recall[i], _ = precision_recall_curve( y_test[:, i], y_score[:, i]) average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i]) # A "micro-average": quantifying score on all classes jointly precision["micro"], recall["micro"], _ = precision_recall_curve( y_test.ravel(), y_score.ravel()) average_precision["micro"] = average_precision_score(y_test, y_score, average="micro") return precision, recall, average_precision
def plot_precision_recall_curve_for_each_class(y_test, y_score): precision, recall, average_precision = get_average_precision_score( y_test, y_score) colors = cycle( ['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal']) plt.figure(figsize=(7, 8)) f_scores = np.linspace(0.2, 0.8, num=4) lines = [] labels = [] for f_score in f_scores: x = np.linspace(0.01, 1) y = f_score * x / (2 * x - f_score) l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2) plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02)) lines.append(l) labels.append('iso-f1 curves') l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2) lines.append(l) labels.append('micro-average Precision-recall (area = {0:0.2f})' ''.format(average_precision["micro"])) for i, color in zip(range(len(LearnUtils.get_encoded_labels())), colors): l, = plt.plot(recall[i], precision[i], color=color, lw=2) lines.append(l) labels.append('Precision-recall for class {0} (area = {1:0.2f})' ''.format(i, average_precision[i])) fig = plt.gcf() fig.subplots_adjust(bottom=0.25) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') # plt.legend(lines, labels, loc=(0, -.38), prop=dict(size=14)) plt.show()
def main(): print("Process started") data = parser.parse_all() print("Reading and parsing completed") # data = {i: data[i] for i in list(data.keys())[3::2]} vectors = get_vectors_for_data(data) print("Vectors created") LearnUtils.set_up(vectors, test_indexes=[1, 4]) print("Utils setup completed") y_test = np.repeat(LearnUtils.get_encoded_labels(), 2) times = 100 knn = KNN() svc = SVCMethod() random_forest = RandomForest() bayes = Bayes() voting = Voting() # ----------------------VOTING LEARN----------------------# voting_result = voting.learn() voting_score = accuracy_score(y_test, voting_result) a = 1 # ----------------------VOTING FEATURE SELECTION----------# # scores = {} # scores["without"] = accuracy_score(y_test, voting.learn()) # for feature_method_name in voting.get_feature_method_names(): # print(f"Start {feature_method_name}") # voting_result = voting.learn(feature_method_name) # scores[feature_method_name] = accuracy_score(y_test, voting_result) # a = 1 # ----------------------VOTING CROSS VALIDATION-----------# # cross_val_result = voting.cross_validation() # a = 1 # ----------------------GRID------------------------------# # knn_grid_result = knn.grid() # svc_grid_result = svc.grid() # random_forest_grid_result = random_forest.grid() # gaussian_grid_result = gaussian.grid() # a = 1 # ----------------------CLASSIFICATION--------------------# knn_result = knn.learn() knn_score = accuracy_score(y_test, knn_result) svc_pred = svc.learn() svc_acc_score = accuracy_score(y_test, svc_pred) print_classification_report(y_test, svc_pred, LearnUtils.get_labels()) a = 1 random_forest_result = random_forest.learn() random_forest_score = accuracy_score(y_test, random_forest_result) bayes_result = bayes.learn() bayes_score = accuracy_score(y_test, bayes_result) a = 1 # ----------------------CLASSIFICATION MULTIPLE TIMES-----# # knn_score = run_multiple_times(knn, y_test, times) # svc_score = run_multiple_times(svc, y_test, times) # random_forest_score = run_multiple_times(random_forest, y_test, times) # gaussian_score = run_multiple_times(gaussian, y_test, times) # bayes_score = run_multiple_times(bayes, y_test, times) a = 1 # # # ----------------------CONFUSION MATRIX----------------------# fig = plot_confusion_matrix(y_test, voting_result, normalize=True, title=f'Матрица ошибок для обобщенных методов\nТочность оценки: {voting_score:.2f}') fig1 = plot_confusion_matrix(y_test, knn_result, normalize=True, title=f'Матрица ошибок для метода "K ближайших соседей"\nТочность оценки: {knn_score:.2f}') fig2 = plot_confusion_matrix(y_test, svc_pred, normalize=True, title=f'Матрица ошибок для метода опорных векторов\nТочность оценки: {svc_acc_score:.2f}') fig3 = plot_confusion_matrix(y_test, random_forest_result, normalize=True, title=f'Матрица ошибок для метода случайного леса\nТочность оценки: {random_forest_score:.2f}') fig5 = plot_confusion_matrix(y_test, bayes_result, normalize=True, title=f'Матрица ошибок для метода найвного Байеса\nТочность оценки: {bayes_score:.2f}') # fig1.show() # fig2.show() # fig3.show() # fig4.show() # fig5.show() fig.savefig("../output/optimized/voting.png") fig1.savefig("../output/optimized/knn.png") fig2.savefig("../output/optimized/svc.png") fig3.savefig("../output/optimized/random_forest.png") fig5.savefig("../output/optimized/bayes.png") a = 1
def cross_validation(self) -> List[float]: labels, train_array = LearnUtils.get_cross_val_data() clf = self.__create_classifier() return cross_val_score(clf, train_array, labels, cv=6)