Пример #1
0
def dating_class_test(file_name: str, k_number: int):
    """
    注意测试分类算法是否正确, 样本集和测试集必须随机从总的样本集里面取, 由于本次测试的数据并没有按照特定目的排序,
    因此我们只是顺序挑取测试集(前10%)
    :param file_name: 文件名
    :param k_number: 截取的k个邻居
    :return:
    """
    ho_ratio = 0.10
    with FTool.LD(file_name) as ld:
        dating_mat, dating_labels = ld.load_to_ndarray(3, True)
    with FTool.Painting3D(name="Personal hobby with love") as plt:
        plt.paint(dating_mat,
                  dating_labels,
                  x_name="Fly miles",
                  y_name="Play game",
                  z_name="Ice Cream")
    norm_mat, ranges, min_val = FTool.HM.average(dating_mat)
    m = norm_mat.shape[0]
    # 校验数据的数量, 一般为10%, 90%样本数据用来训练分类器
    num_test_vec = int(m * ho_ratio)
    error_count = 0
    for i in range(num_test_vec):
        # norm_mat[num_test_vec: m, :] 中num_test_vec: m是截取ndarray中第num_test_vec个后的数据
        classifier_result = KnnLearning.classify0(
            norm_mat[i, :], norm_mat[num_test_vec:m, :],
            dating_labels[num_test_vec:m], k_number)
        print("The classifier came back with {0}, the real answer is: {1}".
              format(classifier_result, dating_labels[i]))
        if classifier_result != dating_labels[i]:
            error_count += 1
    error_rate = error_count / num_test_vec
    print("The total error rate is {}".format(error_rate))
Пример #2
0
 def test_create_tree(self):
     """
     测试创建决策树
     :return:
     """
     file_path = os.path.join(os.getcwd(), "lenses.txt")
     with FTool.LD(file_path) as ld:
         lenses = ld.load_data_list()
         # 年龄, 材质硬度 散光, 流泪频率共四个特征. 第五个为结论
         lenses_labels = ['age', 'prescript', 'astigmatic', 'tearRate']
         lenses_tree = create_tree(lenses, lenses_labels)
         # 画出决策树
         print(lenses_tree)
         PaintingTree().create_plot(lenses_tree)
         self.store_tree(lenses_tree, 'classifierStorage.txt')
Пример #3
0
def classify_person(file_name: str, k_number: int):
    """
    约会网站预测函数, 根据用户输入的对方信息和以往该用户对对象看法进行分类
    :return:
    """
    result_list = ["not at all", "in small doses", "in large doses"]
    percent_tats = float(
        input("percentage of time spent playing video games?"))
    fly_miles = float(input("frequent flier miles earned per year?"))
    ice_cream = float(input("liters of ice cream consumed per year?"))
    with FTool.LD(file_name) as ld:
        dating_data_mat, dating_labels = ld.load_to_ndarray(3, True)
    norm_mat, ranges, min_val = FTool.HM.average(dating_data_mat)
    in_arr = array([fly_miles, percent_tats, ice_cream])
    classifier_result = KnnLearning.classify0(
        (in_arr - min_val) / ranges, norm_mat, dating_labels, k_number)
    print("You will probably like this person: ",
          result_list[classifier_result - 1])
Пример #4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/9/13
# @Author  : Wenhao Shan
# Dsc      : Test of tree(include create, use) and painting tree

import os
from MachineLearingInAction import functionUtils as FTool
from MachineLearingInAction.Code.Chapter3_Random_Forest.trees import create_tree
from MachineLearingInAction.Code.Chapter3_Random_Forest.tree_plot import PaintingTree

if __name__ == '__main__':
    file_path = os.getcwd() + "\lenses.txt"
    with FTool.LD(file_path) as ld:
        lenses = ld.load_data_list()
        # 年龄, 材质硬度 散光, 流泪频率共四个特征
        lenses_labels = ['age', 'prescript', 'astigmatic', 'tearRate']
        lenses_tree = create_tree(lenses, lenses_labels)
        print(lenses_tree)
        PaintingTree().create_plot(lenses_tree)