Пример #1
0
 def __init__(self, category_list_path, training_set_path):
     category_list = system.get_content_list(category_list_path)
     training_set_material = system.get_content_list(training_set_path)
     self.category_list = category_list
     self.training_set_material = training_set_material
Пример #2
0
 def __init__(self, category_list_path, training_set_path):
     category_list = system.get_content_list(category_list_path)
     training_set_material = system.get_content_list(training_set_path)
     self.category_list = category_list
     self.training_set_material = training_set_material
Пример #3
0
# -*- coding:utf-8 -*-

from lib.baseClass import NB
from py_utility import system


if __name__ == "__main__":
    model_path = "model/nb_model.model"
    verification_samples_path = "data/verification_set.txt"
    verification_samples = system.get_content_list(verification_samples_path)

    n_verification_samples = len(verification_samples)

    global_counter = 0
    cate_counter = dict()
    precision_counter = dict()
    # {"互联网":[33, 87], "体育":[14, 53] .....  }

    for ele in verification_samples:
        ele_list = ele.split("\t")
        text = ele_list[0]
        label = ele_list[1]
        model = system.json_loads(model_path)
        cl = NB(model["prior_table"], model["posterior_table"])
        result = cl.predict(text)
        print(result)

        if label not in cate_counter.keys():
            cate_counter[label] = [0, 0]

        if result not in precision_counter.keys():
Пример #4
0
# -*- coding:utf-8 -*-

from lib.baseClass import NB
from py_utility import system

if __name__ == "__main__":
    model_path = "model/nb_model.model"
    verification_samples_path = "data/verification_set.txt"
    verification_samples = system.get_content_list(verification_samples_path)

    n_verification_samples = len(verification_samples)

    global_counter = 0
    cate_counter = dict()
    precision_counter = dict()
    # {"互联网":[33, 87], "体育":[14, 53] .....  }

    for ele in verification_samples:
        ele_list = ele.split("\t")
        text = ele_list[0]
        label = ele_list[1]
        model = system.json_loads(model_path)
        cl = NB(model["prior_table"], model["posterior_table"])
        result = cl.predict(text)
        print(result)

        if label not in cate_counter.keys():
            cate_counter[label] = [0, 0]

        if result not in precision_counter.keys():
            precision_counter[result] = [0, 0]
Пример #5
0
__author__ = 'roy'

from py_utility.dataset.preprocess import dataset_split
from py_utility import system

if __name__ == "__main__":
    dataset_path = r"data/open_test/no_meiti/dataset.txt"
    training_path = r"data/training_set.txt"
    verification_path = r"data/verification_set.txt"

    dataset = system.get_content_list(dataset_path)
    ratio = 0.8
    training, verification = dataset_split(dataset, ratio)

    sep = "\n"
    training_str = system.to_string(training, sep)
    verification_str = system.to_string(verification, sep)
    system.write_content(training_path, training_str)
    system.write_content(verification_path, verification_str)