Exemplo n.º 1
0
 def pinyin_2_hanzi(self, pinyinList):
     from Pinyin2Hanzi import DefaultDagParams
     dagParams = DefaultDagParams()
     result = None
     for pinyin in pinyinList:
         hanzi = [l[0] for l in dagParams.get_phrase([pinyin], 10000)]
         if result is None:
             result = hanzi
         else:
             result = [a + b for a in result for b in hanzi]
     return result
def genarate_word_error(sents):
    ans = []
    # hmmparams = DefaultHmmParams()
    dagparams = DefaultDagParams()
    for sent in sents:
        seg_sent = list(jieba.cut(sent))
        while True:
            select_word = random.sample(seg_sent, 1)[0]  #随机一个候选词
            if len(select_word) > 1:
                break

        error_word = select_word
        pinyin_list = lazy_pinyin(select_word)  #获取选定词的拼音
        # result1 = viterbi(hmm_params=hmmparams, observations=pinyin_list, path_num=5)
        try:
            result2 = dag(dagparams, pinyin_list, path_num=5, log=True)
        except KeyError:
            continue
        while len(result2) > 1:
            error_word = ''.join(random.choice(result2).path)
            if error_word != select_word:
                break

        word_index = sent.find(select_word)  #替换词语中的单字
        err_sent = sent[:word_index] + error_word
        if word_index + len(select_word) < len(sent):
            err_sent += sent[word_index + len(select_word):]
        if err_sent != sent:
            ans.append((sent, err_sent))
    return ans
Exemplo n.º 3
0
    def __init__(self, w, h, x, y, font=None, callback=None):
        """
        :param w:文本框宽度
        :param h:文本框高度
        :param x:文本框坐标
        :param y:文本框坐标
        :param font:文本框中使用的字体
        :param callback:在文本框按下回车键之后的回调函数
        """
        self.width = w
        self.height = h
        self.x = x
        self.y = y
        self.text = ""  # 文本框内容
        self.callback = callback
        # 创建背景surface
        self.__surface = pygame.Surface((w, h))
        # 如果font为None,那么效果可能不太好,建议传入font,更好调节
        if font is None:
            self.font = pygame.font.SysFont('microsoftyaheimicrosoftyaheiui',
                                            16)
        else:
            self.font = font

        self.dagparams = DefaultDagParams()
        self.state = 0  # 0初始状态 1输入拼音状态
        self.page = 1  # 第几页
        self.limit = 5  # 显示几个汉字
        self.pinyin = ''
        self.word_list = []  # 候选词列表
        self.word_list_surf = None  # 候选词surface
        self.buffer_text = ''  # 联想缓冲区字符串
Exemplo n.º 4
0
def pinyin_2_hanzi(pinyinList):
    from Pinyin2Hanzi import DefaultDagParams
    from Pinyin2Hanzi import dag

    dagParams = DefaultDagParams()
    # 取第一个值
    result = dag(dagParams, pinyinList, path_num=10, log=True)[0].path[0]
    return result
Exemplo n.º 5
0
def pinyin_2_hanzi(pinyinList):
    from Pinyin2Hanzi import DefaultDagParams
    from Pinyin2Hanzi import dag
    dagParams = DefaultDagParams()
    result = dag(dagParams, pinyinList, path_num=10, log=True)  # 10 代表后选值个数
    for item in result:
        socre = item.score
        res = item.path  # 转换结果
        print(socre, ''.join(res))
Exemplo n.º 6
0
    def pinyin_to_chinese(self, data):
        '''get the chinese from the pinyin

        :param data: pinyin data
        :return:
        '''
        dagparames = DefaultDagParams()
        result = dag(dagparames, data, path_num=10, log=True)
        for item in result:
            print(str(item.score) + ":", item.path)
Exemplo n.º 7
0
def pinyin_2_hanzi(pinyin_str):
    pinyin_list = pinyin_str.split()
    dagParams = DefaultDagParams()
    # 1个候选值
    result = dag(dagParams, pinyin_list, path_num=1, log=True)
    if result:
        res = result[0].path # 转换结果
        hanzi_str = ''.join(res)
        return hanzi_str
    else:
        logger.info("转化有误:" + pinyin_str)
Exemplo n.º 8
0
def pinyin_to_hanzi(pinyin, Topk=5):
    '''
    拼音转化为汉字
    汉字存在多意性,所以这里没有一一对应的关系,只能选出概率最高的topk
    '''
    translator = DefaultDagParams()
    result = dag(translator, pinyin, path_num=Topk, log=True)
    for item in result:
        socre = item.score  # 得分
        res = item.path  # 转换结果
        print socre, ''.join([one.decode('utf-8') for one in res])
Exemplo n.º 9
0
def pinyin_2_hanzi(word):
	if Pinyin2Hanzi.is_chinese(word):
		word_pinyin = lazy_pinyin(word)
		dagParams = DefaultDagParams()
		word_list = []
		result = dag(dagParams, word_pinyin, path_num=3, log=True)
		for item in result:
			word_list.append(item.path[0])
		return word_list
	else:
		return "Null"
Exemplo n.º 10
0
def pinyin_2_hanzi(sentences):
    from Pinyin2Hanzi import DefaultDagParams
    from Pinyin2Hanzi import dag
    dagParams = DefaultDagParams()
    pinyinList = lazy_pinyin(sentences)
    print(pinyinList)
    result = dag(dagParams, pinyinList, path_num=3)  #10代表侯选值个数
    for item in result:
        socre = item.score
        res = item.path  # 转换结果
        print(socre, res)
Exemplo n.º 11
0
 def pinyin_2_hanzi(self, pinyinList):
     dagParams = DefaultDagParams()
     result = dag(dagParams, pinyinList, path_num=10, log=True)  #10代表侯选值个数
     item_result = []
     try:
         for item in result:
             # socre = item.score
             # res = item.path # 转换结果
             item_result.append([item.score, item.path])
         return sorted(item_result, key=itemgetter(0), reverse=True)[0][1]
     except Exception as e:
         print(e)
         print("输入异常,请重新输入拼音")
Exemplo n.º 12
0
def pinyin2hanzi(pinyin_list):
    '''
    :param text_list: 拼音列表
    :return: 文本二维列表,且每个一维列表的长度为1
    '''
    dagParams, entities = DefaultDagParams(), []
    for line in pinyin_list:
        result = dag(dagParams, line, path_num=5, log=True)
        for item in result:
            res = item.path  # 转换结果
            if len(res) > 1:
                continue
            entities.append(res)
    return entities
Exemplo n.º 13
0
def pinyin_to_hanzi(pinyin,Topk=5,Log=True):
    '''
    拼音转化为汉字
    汉字存在多意性,所以这里没有一一对应的关系,只能选出概率最高的topk
    '''
    print(pinyin)
    translator=DefaultDagParams()
    result=dag(translator,pinyin,path_num=Topk,log=Log)
    #print(result)

    # for item in result:
    #     socre=item.score # 得分
    #     res=item.path # 转换结果
    #     print(socre, ''.join([one.decode('utf-8') for one in res]))
    return result
Exemplo n.º 14
0
def pinyin_2_hanzi(pinyin_str):
    '''
    zhao qing shi ding hu qu fang di chan xie hui --- 肇庆市鼎湖区房地产协会
    '''
    pinyin_list = pinyin_str.split()
    dagParams = DefaultDagParams()
    # 1个候选值
    result = dag(dagParams, pinyin_list, path_num=1, log=True)
    if result:
        res = result[0].path  # 转换结果
        hanzi_str = ''.join(res)
        return hanzi_str
    else:
        return ''
        logger.info("转化有误:" + pinyin_str)
 def __init__(self):
     # 初始化
     self.hmmparams = DefaultHmmParams()
     self.dagparams = DefaultDagParams()
     self.result = ''
     self.shengmu = ['b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'w', 'x', 'y',
                     'z', 'ch', 'sh', 'zh']
     self.yy = ['a', 'ai', 'an', 'ang', 'ao', 'e', 'en', 'eng', 'er', 'o', 'ou', 'ong']
     self.ym_b = ["a", "ai", "an", "ang", "ao", "ei", "en", "eng", "i", "ian", "iao", "ie", "in", "ing", "o", "u"]
     self.ym_c = ["a", "ai", "an", "ang", "ao", "e", "en", "eng", "i", "ong", "ou", "u", "uan", "ui", "un", "uo"]
     self.ym_d = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ia", "ian", "iao", "ie", "ing", "iu",
                  "ong", "ou", "u", "uan", "ui", "un", "uo"]
     self.ym_f = ["a", "an", "ang", "ei", "en", "eng", "iao", "o", "ou", "u"]
     self.ym_g = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "ong", "ou", "u", "uai", "uan", "uang", "ui",
                  "un", "uo"]
     self.ym_h = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "ong", "ou", "u", "ua", "uai", "uan", "uang",
                  "ui", "un", "uo"]
     self.ym_j = ["i", "ia", "ian", "iang", "iao", "ie", "in", "ing", "iong", "iu", "u", "uan", "ue", "un"]
     self.ym_k = ["a", "ai", "an", "ang", "ao", "e", "en", "eng", "ong", "ou", "u", "ui", "un", "uo"]
     self.ym_l = ["a", "ai", "an", "ang", "ao", "e", "ei", "eng", "i", "ia", "ian", "iao", "ie", "in", "ing", "iu",
                  "o", "ong", "ou", "u", "uan", "un", "uo", "v", "ve"]
     self.ym_m = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ian", "iao", "ie", "in", "ing", "iu",
                  "o", "ou", "u"]
     self.ym_n = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ian", "iang", "iao", "ie", "in", "ing",
                  "iu", "ong", "ou", "u", "uan", "un", "uo", "v", "ve"]
     self.ym_p = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ian", "iao", "ie", "in", "ing", "o",
                  "ou", "u"]
     self.ym_q = ["i", "ia", "ian", "iang", "iao", "ie", "in", "ing", "iong", "iu", "u", "uan", "ue", "un"]
     self.ym_r = ["an", "ang", "ao", "e", "en", "eng", "i", "ong", "ou", "u", "ua", "uan", "ui", "un", "uo"]
     self.ym_s = ["a", "ai", "an", "ang", "ao", "e", "en", "eng", "i", "ong", "ou", "u", "uan", "ui", "un", "uo"]
     self.ym_t = ["a", "ai", "an", "ang", "ao", "e", "ei", "eng", "i", "ian", "iao", "ie", "ing", "ong", "ou", "u",
                  "uan", "ui", "un", "uo"]
     self.ym_w = ["a", "ai", "an", "ang", "ei", "en", "eng", "o", "u"]
     self.ym_x = ["i", "ia", "ian", "iang", "iao", "ie", "in", "ing", "iong", "iu", "u", "uan", "ue", "un"]
     self.ym_y = ["a", "an", "ang", "ao", "e", "i", "in", "ing", "o", "ong", "ou", "u", "uan", "ue", "un"]
     self.ym_z = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ong", "ou", "u", "uan", "ui", "un",
                  "uo"]
     self.ym_ch = ["a", "ai", "an", "ang", "ao", "e", "en", "eng", "i", "ong", "ou", "u", "ua", "uai", "uan", "uang",
                   "ui", "un", "uo"]
     self.ym_sh = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ou", "u", "ua", "uai", "uan", "uang",
                   "ui", "un", "uo"]
     self.ym_zh = ["a", "ai", "an", "ang", "ao", "e", "ei", "en", "eng", "i", "ong", "ou", "u", "ua", "uai", "uan",
                   "uang", "ui", "un", "uo"]
     self.ym = [self.yy, self.ym_b, self.ym_c, self.ym_d, self.ym_f, self.ym_g, self.ym_h, self.ym_j, self.ym_k,
                self.ym_l, self.ym_m, self.ym_n, self.ym_p, self.ym_q, self.ym_r, self.ym_s, self.ym_t, self.ym_w,
                self.ym_x, self.ym_y, self.ym_z, self.ym_ch, self.ym_sh, self.ym_zh
                ]
Exemplo n.º 16
0
def get_chengyu(word):
    bigger_list = []
    if Pinyin2Hanzi.is_chinese(word):
        word_pinyin = lazy_pinyin(word)
        dagParams = DefaultDagParams()
        word_list = []
        result = dag(dagParams, word_pinyin, path_num=3, log=True)
        for item in result:
            word_list.append(item.path[0])

        for avg_word in word_list:
            bigger_list.append(find_chengyu(avg_word))

        heiheihei = list((chain(*bigger_list)))
        max_len = len(heiheihei) - 1
        flag = random.randint(0, max_len)
        return heiheihei[flag]
    else:
        return "Null"
Exemplo n.º 17
0
    def __init__(self, w, h, x, y, callback=None):
        self.font = pygame.font.SysFont('microsoftyaheimicrosoftyaheiui', 16)
        self.width = w
        self.height = h
        self.x = x
        self.y = y
        self.text = ""  # 文本框内容
        self.callback = callback
        # 创建背景surface
        self.__surface = pygame.Surface((w, h))
        self.__surface.fill((250, 250, 250))

        self.dagparams = DefaultDagParams()
        self.state = 0  # 0初始状态 1输入拼音状态
        self.page = 1  # 第几页
        self.limit = 5  # 显示几个汉字
        self.pinyin = ''
        self.word_list = []  # 候选词列表
        self.word_list_surf = None  # 候选词surface
        self.buffer_text = ''  # 联想缓冲区字符串
Exemplo n.º 18
0
# coding: utf-8
from __future__ import (print_function, unicode_literals)

import sys
sys.path.append('..')

from Pinyin2Hanzi import DefaultDagParams
from Pinyin2Hanzi import dag

dagparams = DefaultDagParams()

result = dag(dagparams, ['wo'])
for item in result:
    print(item.score, '/'.join(item.path))

print(20 * '*')

result = dag(dagparams, ['ni', 'hao'])
for item in result:
    print(item.score, '/'.join(item.path))

print(20 * '*')

result = dag(dagparams, ['ni', 'bu', 'zhi', 'dao', 'de', 'shi'])
for item in result:
    print(item.score, '/'.join(item.path))

print(20 * '*')

result = dag(dagparams, ['ni', 'bu', 'zhi', 'dao', 'de', 'shi'],
             path_num=2,
Exemplo n.º 19
0
 def __init__(self):
     self._dagparams = DefaultDagParams()
Exemplo n.º 20
0
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName  :input.py
# @Time      :2022/1/4 21:04
# @Author    :russionbear

import pygame
from Pinyin2Hanzi import is_pinyin, dag, DefaultDagParams
from .. import Pen
import ctypes

__param = DefaultDagParams()
PATH_NUM = 20


def get_pinyin(s0):
    """
    将一段拼音,分解成一个个拼音
    :param s0: 匹配的字符串
    :return: 匹配到的拼音列表
    """
    for i1, i in enumerate(reversed(s0)):
        if i < 'a' or i > 'z':
            s0 = s0[-i1 + 1:]

    result = []

    if not s0:
        return result

    max_len = 6  # 拼音最长为6
Exemplo n.º 21
0
def pinyin_2_hanzi(pinyinList):
    from Pinyin2Hanzi import DefaultDagParams
    from Pinyin2Hanzi import dag
    dagParams = DefaultDagParams()
    result = dag(dagParams, pinyinList, path_num=500000, log=True)#10代表侯选值个数
    return [''.join(item.path) for item in result]
Exemplo n.º 22
0
# coding: utf-8
from __future__ import (print_function, unicode_literals)

import sys
sys.path.append('..')

from Pinyin2Hanzi import all_pinyin
from Pinyin2Hanzi import DefaultDagParams

dagparams = DefaultDagParams()

for py in all_pinyin():
    if len(dagparams.get_phrase([py]) ) == 0:
        print(py)


print( dagparams.get_phrase(['ju']) )
print( dagparams.get_phrase(['jve']) )