def classify(classify_class1=True):
    if not os.path.exists(dp.bow_log_dataset_dir):
        log_dataset = dp.load_file_save_dataset()
    else:
        log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir)

    if classify_class1:
        test_features = log_dataset.test_features
        test_class_labels = log_dataset.test_class_labels1
        train_features = log_dataset.train_features
        train_class_labels = log_dataset.train_class_labels1
        label_map = log_dataset.class_label1_map
    else:
        test_features = log_dataset.test_features
        test_class_labels = log_dataset.test_class_labels2
        train_features = log_dataset.train_features
        train_class_labels = log_dataset.train_class_labels2
        label_map = log_dataset.class_label2_map

    acc = 0
    for i in range(log_dataset.test_num):
        class_label = knn.classify(test_features[i], train_features,
                                   train_class_labels, 3)
        actual_label = test_class_labels[i]
        if class_label == actual_label:
            acc += 1
            print('正确', class_label)
        else:
            print('错误, 预测类别:%s, 正确类别:%s' %
                  (label_map[class_label], label_map[actual_label]))

    print('正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num))
Exemplo n.º 2
0
def classify(classify_class1=True):
    if not os.path.exists(dp.bow_log_dataset_dir):
        print('数据预处理并保存中')
        log_dataset = dp.load_file_save_dataset()
    else:
        log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir)

    if classify_class1:
        test_features = log_dataset.test_features
        test_class_labels = log_dataset.test_class_labels1
        train_features = log_dataset.train_features
        train_class_labels = log_dataset.train_class_labels1
        label_map = log_dataset.class_label1_map
        class_num = log_dataset.class1_num
    else:
        test_features = log_dataset.test_features
        test_class_labels = log_dataset.test_class_labels2
        train_features = log_dataset.train_features
        train_class_labels = log_dataset.train_class_labels2
        label_map = log_dataset.class_label2_map
        class_num = log_dataset.class2_num

    train_start_time = time.perf_counter()
    softmax_clf = softmax.Softmax(train_features,
                                  train_class_labels,
                                  class_num,
                                  alpha=0.01,
                                  iterations=50)
    softmax_clf.train()
    train_end_time = time.perf_counter()

    predict_start_time = time.perf_counter()
    predict_class_labels = softmax_clf.predict(test_features)
    acc = 0
    for i in range(len(predict_class_labels)):
        class_label = predict_class_labels[i]
        actual_label = test_class_labels[i]
        if class_label == actual_label:
            acc += 1
            print('正确', label_map[class_label])
        else:
            print('错误, 预测类别:%s, 正确类别:%s' %
                  (label_map[class_label], label_map[actual_label]))

    print('\n正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num))
    predict_end_time = time.perf_counter()

    print('Softmax-训练阶段运行时间:%s秒' % (train_end_time - train_start_time))
    print('Softmax-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
Exemplo n.º 3
0
def classify(classify_class1=True):
    if not os.path.exists(dp.bow_log_dataset_dir):
        print('数据预处理并保存中')
        log_dataset = dp.load_file_save_dataset()
    else:
        log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir)

    test_features = log_dataset.test_features
    test_num = log_dataset.test_num

    if classify_class1:
        train_class_labels = log_dataset.train_class_labels1
        test_class_labels = log_dataset.test_class_labels1
        class_num = log_dataset.class1_num
        label_map = log_dataset.class_label1_map
    else:
        train_class_labels = log_dataset.train_class_labels2
        test_class_labels = log_dataset.test_class_labels2
        class_num = log_dataset.class2_num
        label_map = log_dataset.class_label2_map

    train_start_time = time.perf_counter()
    p_words, p_class = nb.train_nb(log_dataset.train_features,
                                   train_class_labels, class_num)
    train_end_time = time.perf_counter()

    predict_start_time = time.perf_counter()
    acc = 0
    for i in range(test_num):
        class_label = nb.nb_classify(p_words, p_class, test_features[i],
                                     class_num)
        if class_label == test_class_labels[i]:
            acc += 1
            print('正确', label_map[class_label])
        else:
            print('错误, 预测类别:%s, 正确类别:%s' %
                  (label_map[class_label], label_map[test_class_labels[i]]))

    print('\n正确率:%.2f%%' % (100.0 * acc / test_num))
    predict_end_time = time.perf_counter()

    print('Naive Bayes-训练阶段运行时间:%s秒' % (train_end_time - train_start_time))
    print('Naive Bayes-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
Exemplo n.º 4
0
def classify(classify_class1=True):
    if not os.path.exists(dp.bow_log_dataset_dir):
        print('数据预处理并保存中')
        log_dataset = dp.load_file_save_dataset()
    else:
        log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir)

    if classify_class1:
        test_features = log_dataset.test_features
        test_class_labels = log_dataset.test_class_labels1
        train_features = log_dataset.train_features
        train_class_labels = log_dataset.train_class_labels1
        label_map = log_dataset.class_label1_map
    else:
        test_features = log_dataset.test_features
        test_class_labels = log_dataset.test_class_labels2
        train_features = log_dataset.train_features
        train_class_labels = log_dataset.train_class_labels2
        label_map = log_dataset.class_label2_map

    train_start_time = time.perf_counter()
    clf = MLPClassifier(hidden_layer_sizes=(100,), solver='adam', activation='relu', random_state=1, verbose=True)
    clf.fit(train_features, train_class_labels)
    train_end_time = time.perf_counter()

    predict_start_time = time.perf_counter()
    predict_class_labels = clf.predict(test_features)
    acc = 0
    for i in range(len(predict_class_labels)):
        class_label = predict_class_labels[i]
        actual_label = test_class_labels[i]
        if class_label == actual_label:
            acc += 1
            print('正确', label_map[class_label])
        else:
            print('错误, 预测类别:%s, 正确类别:%s' % (label_map[class_label], label_map[actual_label]))

    print('\n正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num))
    predict_end_time = time.perf_counter()

    print('Neural Network-训练阶段运行时间:%s秒' % (train_end_time - train_start_time))
    print('Neural Network-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
Exemplo n.º 5
0
def classify(classify_class1=True):
    if not os.path.exists(dp.bow_log_dataset_dir):
        print('数据预处理并保存中')
        log_dataset = dp.load_file_save_dataset()
    else:
        log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir)

    if classify_class1:
        test_features = log_dataset.test_features
        test_class_labels = log_dataset.test_class_labels1
        train_features = log_dataset.train_features
        train_class_labels = log_dataset.train_class_labels1
        label_map = log_dataset.class_label1_map
    else:
        test_features = log_dataset.test_features
        test_class_labels = log_dataset.test_class_labels2
        train_features = log_dataset.train_features
        train_class_labels = log_dataset.train_class_labels2
        label_map = log_dataset.class_label2_map

    train_start_time = time.perf_counter()
    knn_classifier = KNeighborsClassifier(n_neighbors=3)
    knn_classifier.fit(train_features, train_class_labels)
    train_end_time = time.perf_counter()

    predict_start_time = time.perf_counter()
    acc = 0
    for i in range(log_dataset.test_num):
        class_label = knn_classifier.predict([test_features[i]])[0]
        actual_label = test_class_labels[i]
        if class_label == actual_label:
            acc += 1
            print('正确', label_map[class_label])
        else:
            print('错误, 预测类别:%s, 正确类别:%s' %
                  (label_map[class_label], label_map[actual_label]))

    print('\n正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num))
    predict_end_time = time.perf_counter()

    print('KNN-sklearn-训练阶段运行时间:%s秒' % (train_end_time - train_start_time))
    print('KNN-sklearn-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
def classify(classify_class1=True):
    if not os.path.exists(dp.bow_log_dataset_dir):
        print('数据预处理并保存中')
        log_dataset = dp.load_file_save_dataset()
    else:
        log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir)

    if classify_class1:
        train_class_labels = log_dataset.train_class_labels1
        label_map = log_dataset.class_label1_map
        test_class_labels = log_dataset.test_class_labels1
    else:
        train_class_labels = log_dataset.train_class_labels2
        label_map = log_dataset.class_label2_map
        test_class_labels = log_dataset.test_class_labels2

    d_tree = tree.DecisionTreeClassifier()
    build_tree_start_time = time.perf_counter()
    d_tree.fit(log_dataset.train_features, train_class_labels)
    build_tree_end_time = time.perf_counter()
    print(tree.export_text(d_tree, feature_names=log_dataset.feature_labels))

    predict_start_time = time.perf_counter()
    acc = 0
    for i in range(log_dataset.test_num):
        class_label = d_tree.predict([log_dataset.test_features[i]])[0]
        actual_label = test_class_labels[i]
        if class_label == actual_label:
            acc += 1
            print('正确', label_map[class_label])
        else:
            print('错误, 预测类别:%s, 正确类别:%s' %
                  (label_map[class_label], label_map[actual_label]))

    print('\n正确率:%.2f%%' % (100.0 * acc / log_dataset.test_num))
    predict_end_time = time.perf_counter()

    print('决策树-sklearn-构建决策树运行时间:%s秒' %
          (build_tree_end_time - build_tree_start_time))
    print('决策树-sklearn-预测阶段运行时间:%s秒' % (predict_end_time - predict_start_time))
def classify():
    log_dataset = dp.load_log_dataset(dp.bow_log_dataset_dir)
    train_data = log_dataset.get_combined_train()
    d_tree = id3.create_tree(train_data, log_dataset.feature_labels)
    print(d_tree)