Python TextProcess.jieba_initialize примеры использования

Язык программирования: Python

Пространство имен/Пакет: Tools

Класс/Тип: TextProcess

Метод/Функция: jieba_initialize

Примеров на hotexamples.com: 4

Python TextProcess.jieba_initialize - 4 примера найдено. Это лучшие примеры Python кода для Tools.TextProcess.jieba_initialize, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

postag(8)

jieba_initialize(4)

wordSegment(4)

cut(3)

load_baikeattr_name(2)

load_synonyms_word_inattr(2)

Пример #1

Показать файл

def qa(question):

    #初始化jieba分词器
    T.jieba_initialize()

    #切换到语料库所在工作目录
    mybot_path = './'
    # os.chdir(mybot_path)

    mybot = aiml.Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/OrdinaryQuestion.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/Common conversation.aiml")
    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    input_message = question

    if len(input_message) > 60:
        print(mybot.respond("句子长度过长"))
    elif input_message.strip() == '':
        print(mybot.respond("无"))

    print(input_message)
    message = T.wordSegment(input_message)
    # 去标点
    print('word Seg:' + message)
    print('词性：')
    words = T.postag(input_message)
    if message == 'q':
        exit()
    else:
        response = mybot.respond(message)

        print("=======")
        print(response)
        print("=======")

        if response == "":
            ans = mybot.respond('找不到答案')
            # print(robot_id + ":" + ans)
            print("{0}:{1}".format(robot_id, ans))
        # 百科搜索
        elif response[0] == '#':
            # 匹配百科
            if response.__contains__("searchbaike"):
                print("search from baike")
                print(response)
                res = response.split(':')
                #实体
                entity = str(res[1]).replace(" ", "")
                #属性
                attr = str(res[2]).replace(" ", "")
                print(entity + '<---->' + attr)

                ans = baike.query(entity, attr)
                # 如果命中答案
                if type(ans) == list:
                    print("{0}:{1}".format(robot_id, QAT.ptranswer(ans,
                                                                   False)))
                elif ans.decode('utf-8').__contains__(u'::找不到'):
                    #百度摘要+Bing摘要
                    print("通用搜索")
                    log.info("通用搜索")
                    ans = search_summary.kwquery(input_message)

            # 匹配不到模版，通用查询
            elif response.__contains__("NoMatchingTemplate"):
                print("NoMatchingTemplate")
                ans = search_summary.kwquery(input_message)

            if len(ans) == 0:
                ans = mybot.respond('找不到答案')
                logs.info("{0}:{1}".format(robot_id, ans))
            elif len(ans) > 1:
                logs.info(sys.exc_info())
                logs.info("不确定候选答案")
                logs.info("[{0}][func:{1}][line:{2}]:不确定候选答案".format(
                    sys._getframe().f_code.co_filename,
                    sys._getframe().f_code.co_name,
                    sys._getframe().f_lineno))
                print(robot_id + ': ')
                for a in ans:
                    print(a)
                    # print(a.encode("utf8"))
            else:
                print('{0}:{1}'.format(robot_id, ans[0]))

        # 匹配模版
        else:
            print("{}: {}".format(robot_id, response))

Пример #2

Показать файл

Файл: main.py Проект: zyyyyy/MillionHeroAssistant

def main():
    args = parse_args()
    timeout = args.timeout

    # 初始化jb分词器
    T.jieba_initialize()

    # 切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    mybot = Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/OrdinaryQuestion.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/Common conversation.aiml")

    if enable_chrome:
        question_obj = Value(ctypes.c_char_p, "".encode("utf-8"))
        browser_daemon = multiprocessing.Process(target=run_browser,
                                                 args=(question_obj, ))
        browser_daemon.daemon = True
        browser_daemon.start()

    def __inner_job():
        start = time.time()
        text_binary = analyze_current_screen_text(
            directory=data_directory, compress_level=image_compress_level[0])

        keywords = get_text_from_image(image_data=text_binary, )
        if not keywords:
            print("text not recognize")
            return

        true_flag, question, answers = parse_question_and_answer(keywords)
        #questions=question.decode('unicode-escape')
        #new_ans=[]
        #for ans in answers:
        # new_ans.append(ans.decode('unicode-escape'))

        print('-' * 72)
        print(question)
        print('-' * 72)
        print("\n".join(answers))

        # notice browser
        if enable_chrome:
            with question_obj.get_lock():
                question_obj.value = question
                keyboard.press("space")

        search_question = pre_process_question(question)
        summary = baidu_count(search_question, answers, timeout=timeout)
        summary_li = sorted(summary.items(),
                            key=operator.itemgetter(1),
                            reverse=True)
        data = [("选项", "同比")]
        for a, w in summary_li:
            data.append((a, w))
        table = AsciiTable(data)
        print(table.table)

        print("*" * 72)
        if true_flag:
            print("肯定回答(**)： ", summary_li[0][0])
            print("否定回答(  )： ", summary_li[-1][0])
        else:
            print("肯定回答(  )： ", summary_li[0][0])
            print("否定回答(**)： ", summary_li[-1][0])
        print("*" * 72)

        ##############################################################
        input_message = question

        if len(input_message) > 60:
            print(mybot.respond("句子长度过长"))
        elif input_message.strip() == '':
            print(mybot.respond("无"))

        #print(input_message)
        message = T.wordSegment(input_message)
        # 去标点
        #print('word Seg:' + message)
        #print('词性：')
        words = T.postag(input_message)

        if message == 'q':
            exit()
        else:
            response = mybot.respond(message)

            #print("=======")
            #print(response)
            #print("=======")

            if response == "":
                ans = mybot.respond('找不到答案')
                print('Eric：' + ans)
            # 百科搜索
            elif response[0] == '#':
                # 匹配百科
                if response.__contains__("searchbaike"):
                    #print("searchbaike")
                    #print(response)
                    res = response.split(':')
                    # 实体
                    entity = str(res[1]).replace(" ", "")
                    # 属性
                    attr = str(res[2]).replace(" ", "")
                    #print(entity + '<---->' + attr)

                    ans = baike.query(entity, attr)
                    # 如果命中答案
                    if type(ans) == list:
                        print('Eric：' + QAT.ptranswer(ans, False))

                    elif ans.decode('utf-8').__contains__(u'::找不到'):
                        # 百度摘要+Bing摘要
                        print("通用搜索")
                        ans = search_summary.kwquery(input_message)

                # 匹配不到模版，通用查询
                elif response.__contains__("NoMatchingTemplate"):
                    #print("NoMatchingTemplate")
                    ans = search_summary.kwquery(input_message, answers)

                if len(ans) == 0:
                    print('Eric：' + '找不到答案')
                elif len(ans) > 1:
                    print("不确定候选答案")
                    print('Eric: ')
                    for a in ans:
                        print(a)
                else:
                    print('Eric：' + ans[0])

            # 匹配模版
            else:
                print('Eric：' + response)

        end = time.time()
        print("use {0} 秒".format(end - start))
        save_screen(directory=data_directory)

    while True:
        print("""
    请在答题开始前就运行程序，
    答题开始的时候按Enter预测答案
                """)

        enter = input("按Enter键开始，按ESC键退出...")
        print(enter)

        if enter == chr(27):
            break
        try:
            __inner_job()
        except Exception as e:
            print(str(e))

        print("欢迎下次使用")

Пример #3

Показать файл

def main():
    # 初始化jb分词器
    T.jieba_initialize()

    # 切换到语料库所在工作目录
    mybot_path = './'
    os.chdir(mybot_path)

    # 加载AIML的规则
    mybot = aiml.Kernel()
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/std-startup.xml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/abc.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/bot_profile.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/general.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/infor.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] + "/resources/main.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/new07281.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/salutations.aiml")
    mybot.learn(
        os.path.split(os.path.realpath(__file__))[0] +
        "/resources/virus0727.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/zextra_weibao.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
    # mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")

    # mybot.respond('Load Doc Snake')
    #载入百科属性列表

    #     print '''
    # .----------------.  .-----------------. .----------------.  .----------------.  .----------------.
    # | .--------------. || .--------------. || .--------------. || .--------------. || .--------------. |
    # | |    _______   | || | ____  _____  | || |      __      | || |  ___  ____   | || |  _________   | |
    # | |   /  ___  |  | || ||_   \|_   _| | || |     /  \     | || | |_  ||_  _|  | || | |_   ___  |  | |
    # | |  |  (__ \_|  | || |  |   \ | |   | || |    / /\ \    | || |   | |_/ /    | || |   | |_  \_|  | |
    # | |   '.___`-.   | || |  | |\ \| |   | || |   / /__\ \   | || |   |  __'.    | || |   |  _|  _   | |
    # | |  |`\____) |  | || | _| |_\   |_  | || | _/ /    \ \_ | || |  _| |  \ \_  | || |  _| |___/ |  | |
    # | |  |_______.'  | || ||_____|\____| | || ||____|  |____|| || | |____||____| | || | |_________|  | |
    # | |              | || |              | || |              | || |              | || |              | |
    # | '--------------' || '--------------' || '--------------' || '--------------' || '--------------' |
    #  '----------------'  '----------------'  '----------------'  '----------------'  '----------------'
    #  Eric：你好，我是Eric。╭(╯^╰)╮
    #     '''

    print('泰康小康：你好，我是小康。╭(╯^╰)╮')

    # 对问题的处理流程
    while True:
        # 输入这个问题
        input_message = raw_input("您想问什么 >> ")

        # 对输入问题进行简单的处理：
        # 忽略过长（超过60）的问题
        # 忽略空问题
        if len(input_message) > 60:
            print(mybot.respond("句子长度过长"))
            continue
        elif input_message.strip() == '':
            print(mybot.respond("无"))
            continue

        # 利用Tools工具对问题进行处理
        print(input_message)
        message = T.wordSegment(input_message)
        # 去标点
        print('word Seg:' + message)
        # print('词性：')
        words = T.postag(input_message)

        # 退出
        if message == 'q':
            exit()
        # 返回信息的优先级
        else:
            # 首先是AIML的模板匹配
            response = mybot.respond(message)

            print("=======")
            print(response)
            print(len(response.decode('utf8')))
            print("=======")

Пример #4

Показать файл

from Tools import TextProcess as T

from QuestionParser import aiml_parse
from AnswerGeneration import aiml_generate
import time

mybot = aiml.Kernel()
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/std-startup.xml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bye.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/tools.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/bad.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/funny.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/OrdinaryQuestion.aiml")
mybot.learn(os.path.split(os.path.realpath(__file__))[0] + "/resources/Common conversation.aiml")

T.jieba_initialize()


def qa(question, T, mybot, QAT):
    q_parsed = aiml_parse.aiml_question_parsing(question, T, mybot)
    ans = aiml_generate.aiml_answer_generate(q_parsed, mybot, QAT, question)
    return ans

def code_format(s):
    try:
        s = s.encode('utf8')
    except:
        s = s
    return s