示例#1
0
 def grid(self):
     train_labels, train_array, test_array = LearnUtils.get_learn_data()
     test_labels = np.repeat(LearnUtils.get_encoded_labels(), 2)
     ps = PredefinedSplit(np.append(np.full((train_array.shape[0]), -1, dtype=int),
                                    np.full((test_array.shape[0]), 0, dtype=int)))
     param_grid = dict(n_neighbors=list(range(1, train_array.shape[0] - 1)))
     clf = self.get_classifier()
     grid = GridSearchCV(clf, param_grid, cv=ps)
     grid.fit(np.append(train_array, test_array, axis=0), np.append(train_labels, test_labels, axis=0))
     return grid
示例#2
0
 def grid(self):
     train_labels, train_array, test_array = LearnUtils.get_learn_data()
     test_labels = np.repeat(LearnUtils.get_encoded_labels(), 2)
     ps = PredefinedSplit(np.append(np.full((train_array.shape[0]), -1, dtype=int),
                                    np.full((test_array.shape[0]), 0, dtype=int)))
     param_grid = dict(C=[0.001, 0.01, 0.1, 1, 10, 20, 100, 1000],
                       gamma=[0.001, 0.01, 0.1, 1, 2, 5, 10],
                       kernel=["linear", "poly", "rbf", "sigmoid"])
     clf = self.get_classifier()
     grid = GridSearchCV(clf, param_grid, cv=ps)
     grid.fit(np.append(train_array, test_array, axis=0), np.append(train_labels, test_labels, axis=0))
     return grid
示例#3
0
 def grid(self):
     train_labels, train_array, test_array = LearnUtils.get_learn_data()
     test_labels = np.repeat(LearnUtils.get_encoded_labels(), 2)
     ps = PredefinedSplit(np.append(np.full((train_array.shape[0]), -1, dtype=int),
                                    np.full((test_array.shape[0]), 0, dtype=int)))
     param_grid = dict(
         n_estimators=[10, 50, 100, 200, 500],
         max_features=["auto", "sqrt", "log2"],
         max_depth=[None, 2, 3, 4, 5, 6, 7, 8],
         criterion=["gini", "entropy"]
     )
     clf = self.get_classifier()
     grid = GridSearchCV(clf, param_grid, cv=ps)
     grid.fit(np.append(train_array, test_array, axis=0), np.append(train_labels, test_labels, axis=0))
     return grid
示例#4
0
    def learn(self):
        labels, train_array, test_array = LearnUtils.get_learn_data()

        clf = self.get_classifier()
        clf.fit(train_array, labels)

        return clf.predict(test_array).tolist()
示例#5
0
def plot_confusion_matrix(y_true,
                          y_pred,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.get_cmap("Reds")):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Нормальзованная матрица смещения'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = LearnUtils.decode_labels(unique_labels(y_true, y_pred))
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Нормальзованная матрица смещения")
    else:
        print('Confusion matrix, without normalization')

    # print(cm)

    fig, ax = plt.subplots(figsize=(10, 8))
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(
        xticks=np.arange(cm.shape[1]),
        yticks=np.arange(cm.shape[0]),
        # ... and label them with the respective list entries
        xticklabels=classes,
        yticklabels=classes,
        title=title,
        ylabel='Тестовые классы',
        xlabel='Оценочные классы')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(),
             rotation=45,
             ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.1f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j,
                    i,
                    format(cm[i, j], fmt),
                    ha="center",
                    va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return fig
示例#6
0
 def learn(self, feature_method_name: str = None) -> List[int]:
     labels, train_array, test_array = LearnUtils.get_learn_data()
     if feature_method_name is not None:
         feature_filter = self.__feature_methods[feature_method_name]
         feature_filter.fit(train_array, labels)
         train_array = feature_filter.transform(train_array)
         test_array = feature_filter.transform(test_array)
     clf = self.__create_classifier()
     clf.fit(train_array, labels)
     return clf.predict(test_array).tolist()
示例#7
0
def get_average_precision_score(y_test, y_score):
    classes = LearnUtils.get_encoded_labels()
    y_test = label_binarize(y_test, classes=classes)
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(len(classes)):
        precision[i], recall[i], _ = precision_recall_curve(
            y_test[:, i], y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i],
                                                       y_score[:, i])

    # A "micro-average": quantifying score on all classes jointly
    precision["micro"], recall["micro"], _ = precision_recall_curve(
        y_test.ravel(), y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test,
                                                         y_score,
                                                         average="micro")
    return precision, recall, average_precision
示例#8
0
def plot_precision_recall_curve_for_each_class(y_test, y_score):
    precision, recall, average_precision = get_average_precision_score(
        y_test, y_score)
    colors = cycle(
        ['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal'])

    plt.figure(figsize=(7, 8))
    f_scores = np.linspace(0.2, 0.8, num=4)
    lines = []
    labels = []
    for f_score in f_scores:
        x = np.linspace(0.01, 1)
        y = f_score * x / (2 * x - f_score)
        l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
        plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))

    lines.append(l)
    labels.append('iso-f1 curves')
    l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
    lines.append(l)
    labels.append('micro-average Precision-recall (area = {0:0.2f})'
                  ''.format(average_precision["micro"]))

    for i, color in zip(range(len(LearnUtils.get_encoded_labels())), colors):
        l, = plt.plot(recall[i], precision[i], color=color, lw=2)
        lines.append(l)
        labels.append('Precision-recall for class {0} (area = {1:0.2f})'
                      ''.format(i, average_precision[i]))

    fig = plt.gcf()
    fig.subplots_adjust(bottom=0.25)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    # plt.legend(lines, labels, loc=(0, -.38), prop=dict(size=14))

    plt.show()
示例#9
0
def main():
    print("Process started")
    data = parser.parse_all()
    print("Reading and parsing completed")
    # data = {i: data[i] for i in list(data.keys())[3::2]}
    vectors = get_vectors_for_data(data)
    print("Vectors created")
    LearnUtils.set_up(vectors, test_indexes=[1, 4])
    print("Utils setup completed")
    y_test = np.repeat(LearnUtils.get_encoded_labels(), 2)
    times = 100
    knn = KNN()
    svc = SVCMethod()
    random_forest = RandomForest()
    bayes = Bayes()
    voting = Voting()
    # ----------------------VOTING LEARN----------------------#
    voting_result = voting.learn()
    voting_score = accuracy_score(y_test, voting_result)
    a = 1

    # ----------------------VOTING FEATURE SELECTION----------#
    # scores = {}
    # scores["without"] = accuracy_score(y_test, voting.learn())
    # for feature_method_name in voting.get_feature_method_names():
    #     print(f"Start {feature_method_name}")
    #     voting_result = voting.learn(feature_method_name)
    #     scores[feature_method_name] = accuracy_score(y_test, voting_result)
    # a = 1

    # ----------------------VOTING CROSS VALIDATION-----------#
    # cross_val_result = voting.cross_validation()
    # a = 1

    # ----------------------GRID------------------------------#
    # knn_grid_result = knn.grid()
    # svc_grid_result = svc.grid()
    # random_forest_grid_result = random_forest.grid()
    # gaussian_grid_result = gaussian.grid()
    # a = 1

    # ----------------------CLASSIFICATION--------------------#
    knn_result = knn.learn()
    knn_score = accuracy_score(y_test, knn_result)

    svc_pred = svc.learn()
    svc_acc_score = accuracy_score(y_test, svc_pred)
    print_classification_report(y_test, svc_pred, LearnUtils.get_labels())
    a = 1

    random_forest_result = random_forest.learn()
    random_forest_score = accuracy_score(y_test, random_forest_result)

    bayes_result = bayes.learn()
    bayes_score = accuracy_score(y_test, bayes_result)
    a = 1

    # ----------------------CLASSIFICATION MULTIPLE TIMES-----#
    # knn_score = run_multiple_times(knn, y_test, times)
    # svc_score = run_multiple_times(svc, y_test, times)
    # random_forest_score = run_multiple_times(random_forest, y_test, times)
    # gaussian_score = run_multiple_times(gaussian, y_test, times)
    # bayes_score = run_multiple_times(bayes, y_test, times)
    a = 1

    #
    # # ----------------------CONFUSION MATRIX----------------------#
    fig = plot_confusion_matrix(y_test, voting_result, normalize=True,
                                title=f'Матрица ошибок для обобщенных методов\nТочность оценки: {voting_score:.2f}')
    fig1 = plot_confusion_matrix(y_test, knn_result, normalize=True,
                                 title=f'Матрица ошибок для метода "K ближайших соседей"\nТочность оценки: {knn_score:.2f}')
    fig2 = plot_confusion_matrix(y_test, svc_pred, normalize=True,
                                 title=f'Матрица ошибок для метода опорных векторов\nТочность оценки: {svc_acc_score:.2f}')
    fig3 = plot_confusion_matrix(y_test, random_forest_result, normalize=True,
                                 title=f'Матрица ошибок для метода случайного леса\nТочность оценки: {random_forest_score:.2f}')
    fig5 = plot_confusion_matrix(y_test, bayes_result, normalize=True,
                                 title=f'Матрица ошибок для метода найвного Байеса\nТочность оценки: {bayes_score:.2f}')
    # fig1.show()
    # fig2.show()
    # fig3.show()
    # fig4.show()
    # fig5.show()
    fig.savefig("../output/optimized/voting.png")
    fig1.savefig("../output/optimized/knn.png")
    fig2.savefig("../output/optimized/svc.png")
    fig3.savefig("../output/optimized/random_forest.png")
    fig5.savefig("../output/optimized/bayes.png")

    a = 1
示例#10
0
 def cross_validation(self) -> List[float]:
     labels, train_array = LearnUtils.get_cross_val_data()
     clf = self.__create_classifier()
     return cross_val_score(clf, train_array, labels, cv=6)