def init(): phrases = [] f = codecs.open('words.dic', encoding='utf-8') words = [x.strip() for x in f.read().splitlines()] words.append(u'') f.close() pinyins = set([pinyin.get_pinyin(x) for x in words if len(x) > 0]) for i in pinyins: for j in pinyins: phrases.append(i + j) return [x for x in set(phrases)]
def update_concept_pinyin(): from pinyin import get_pinyin db = Database() cmd = "SELECT * FROM concept" concept_res = db.query_db(cmd) for concept in concept_res: concept_id = concept[0] concept_name = concept[1] pinyin = get_pinyin(concept_name).lower() cmd = 'UPDATE concept SET pinyin = "%s" WHERE id = %s' % (pinyin, concept_id) db.query_db(cmd)
def lookuporinsert_tag(tag_name): tag_object = lookup_tag(tag_name) if not tag_object: tag_object = Tag() tag_object.tag_name = tag_name tag_object.tag_bool_deleted=False tag_object.pinyin=get_pinyin(tag_name) # print tag_object.pinyin tag_object.tag_popularity=0 tag_object.tag_last_update=0 tag_object.save() return tag_object
def sample(self, sentence, ignore_unk=False, beamwidth=10): ids = self._word_to_idx(sentence, self.dict_src) results = self.search_model.apply(numpy.array([ids]).T) outputs, scores = results[:2] if self.with_attention: alignments = results[2] if self.normalize: lengths = numpy.array([len(s) for s in outputs]) scores = scores / lengths sidx = numpy.argmin(scores) res = self._idx_to_word(outputs[sidx][:-1], self.idict_trg) translated_unks = set() if self.replace_unk and self.with_attention: source_words = sentence.split() + [self.eos_token] tran_words = res.split() alignment = numpy.array(alignments[sidx]).transpose() # get the hard alignment aligned_source_words = [ source_words[idx] for idx in numpy.argmax(alignment, axis=0) ] new_tran_words = [] for i in xrange(len(tran_words)): if tran_words[i] != self.unk_token: new_tran_words.append(tran_words[i]) else: # replace unk token aligned_source_word = aligned_source_words[i] # note that get_pinyin only accept Chinese word in GBK encoding new_tran_words.append( self.unk_dict.get(aligned_source_word, get_pinyin(aligned_source_word))) if aligned_source_word in self.unk_dict: translated_unks.add(aligned_source_word) logger.info('new_tran_words:%s' % new_tran_words) res = " ".join(new_tran_words) if self.detokenizer_cmd: detokenizer = Popen(self.detokenizer_cmd, stdin=PIPE, stdout=PIPE) res, _ = detokenizer.communicate(res) unknown_words = [ word for word, index in zip(sentence.split(), ids) if index == self.unk_id and word not in translated_unks ] return res, unknown_words
def replace_unk(self, source_words, output, alignment): tran_words = self._idx_to_word(output, self.idict_trg) aligned_source_words = [source_words[idx] for idx in numpy.argmax(alignment, axis=0)] new_tran_words = [] for i in xrange(len(tran_words)): if tran_words[i] != self.unk_token: new_tran_words.append(tran_words[i]) else: # replace unk token aligned_source_word = aligned_source_words[i] # note that get_pinyin only accept Chinese word in GBK encoding new_tran_words.append(self.unk_dict.get(aligned_source_word, get_pinyin(aligned_source_word))) return " ".join(new_tran_words)
table_name = [ u'基础表_工伤个人待遇支付明细.csv', u'基础表_工伤亡职工变更信息.csv', u'基础表_工伤保险个人参保信息.csv', u'基础表_工伤保险个人应缴实缴明细信息.csv', u'基础表_工伤保险个人缴费基数信息.csv', u'基础表_工伤保险个人补退信息.csv', u'基础表_工伤保险单位参保信息.csv', u'基础表_工伤保险单位应缴信息.csv', u'基础表_工伤保险单位欠费明细信息.csv', u'基础表_工伤保险单位缴费待转基金信息.csv', u'基础表_工伤保险单位补退信息.csv', u'基础表_工伤保险参保个人基本信息.csv', u'基础表_工伤保险参保单位基本信息.csv', u'基础表_工伤保险在职人员变更信息.csv', u'基础表_工伤保险征集通知明细信息.csv', u'基础表_工伤保险待遇支付信息.csv', u'基础表_工伤保险综合参数表.csv', u'基础表_工伤保险缴费比例信息.csv', u'基础表_工伤保险职工平均工资参数表.csv', u'基础表_工伤劳动能力鉴定信息.csv', u'基础表_工伤定期待遇参数.csv', u'基础表_工伤职工工伤亡信息.csv', u'基础表_工伤非定期待遇参数.csv', u'工伤个人待遇支付明细.csv', u'工伤亡职工变更信息.csv', u'工伤供养亲属变更信息.csv', u'工伤供养亲属基本信息.csv', u'工伤供养亲属待遇审批信息.csv', u'工伤保险个人参保信息.csv', u'工伤保险个人应缴实缴明细信息.csv', u'工伤保险个人待转基金信息.csv', u'工伤保险个人缴费到账信息.csv', u'工伤保险个人缴费到账明细信息.csv', u'工伤保险个人缴费基数信息.csv', u'工伤保险个人补退信息.csv', u'工伤保险人员转移信息.csv', u'工伤保险代扣代缴明细信息.csv', u'工伤保险单位参保信息.csv', u'工伤保险单位变更信息.csv', u'工伤保险单位变更登记信息.csv', u'工伤保险单位实缴信息.csv', u'工伤保险单位应缴信息.csv', u'工伤保险单位欠费明细信息.csv', u'工伤保险单位缴费到账信息.csv', u'工伤保险单位缴费到账明细信息.csv', u'工伤保险单位缴费待转基金信息.csv', u'工伤保险单位缴费申报.csv', u'工伤保险单位补退信息.csv', u'工伤保险参保个人基本信息.csv', u'工伤保险参保单位基本信息.csv', u'工伤保险在职人员变更信息.csv', u'工伤保险征集通知明细信息.csv', u'工伤保险待遇支付信息.csv', u'工伤保险待遇类别与支付项目对照.csv', u'工伤保险欠款核销信息.csv', u'工伤保险经办机构.csv', u'工伤保险综合参数表.csv', u'工伤保险缴费比例信息.csv', u'工伤保险职工平均工资参数表.csv', u'工伤劳动能力鉴定信息.csv', u'工伤单位实付信息.csv', u'工伤单位应付信息.csv', u'工伤定期待遇参数.csv', u'工伤定期待遇审批信息.csv', u'工伤职工工伤亡信息.csv', u'工伤补发退发信息.csv', u'工伤非定期待遇参数.csv', u'工伤非定期待遇审批信息.csv' ] for name in table_name: # print name.encode('utf-8') print pinyin.get_pinyin(name) # print pinyin.get_initial(name).replace(' ', '')
#!c:\python27\python # -*- coding: UTF-8 -*- # 引入 CGI 处理模块 import cgi, cgitb import pinyin # 创建 FieldStorage的实例 form = cgi.FieldStorage() # 接收字段数据 if form.getvalue('textcontent'): text_content = pinyin.get_pinyin(form.getvalue('textcontent')) else: text_content = "没有内容" print "Content-type:text/html" print print "<html>" print "<head>" print "<meta charset=\"utf-8\">" print "<title>菜鸟教程 CGI 测试实例</title>" print "</head>" print "<body>" print "<h2> 输入的内容是:%s</h2>" % text_content print "</body>" print "</html>"
u'工伤保险单位缴费到账明细信息.csv', u'工伤保险单位缴费待转基金信息.csv', u'工伤保险单位缴费申报.csv', u'工伤保险单位补退信息.csv', u'工伤保险参保个人基本信息.csv', u'工伤保险参保单位基本信息.csv', u'工伤保险在职人员变更信息.csv', u'工伤保险征集通知明细信息.csv', u'工伤保险待遇支付信息.csv', u'工伤保险待遇类别与支付项目对照.csv', u'工伤保险欠款核销信息.csv', u'工伤保险经办机构.csv', u'工伤保险综合参数表.csv', u'工伤保险缴费比例信息.csv', u'工伤保险职工平均工资参数表.csv', u'工伤劳动能力鉴定信息.csv', u'工伤单位实付信息.csv', u'工伤单位应付信息.csv', u'工伤定期待遇参数.csv', u'工伤定期待遇审批信息.csv', u'工伤职工工伤亡信息.csv', u'工伤补发退发信息.csv', u'工伤非定期待遇参数.csv', u'工伤非定期待遇审批信息.csv' ] for name in table_name: # print name.encode('utf-8') print pinyin.get_pinyin(name) # print pinyin.get_initial(name).replace(' ', '')
print(client_pinyins) for item in client_pinyins: client_pinyin = item[0].split('&') content = item[1] evaluators.append(Evaluator(client_pinyin, server_pinyin, content)) for e in evaluators: print(e) evaluators.sort() result_content = evaluators[-1].content addToClipBoard(result_content) sg.PopupOK('成功输出到剪贴板') else: print('You entered ', values) character = values[0] pinyin_list = get_pinyin(character) pinyin = '' for pinyin_item in pinyin_list: pinyin += (pinyin_item[0] + '&') content = values[1] record = (character, pinyin, content) print(record) if event in ('Delete'): try: record_is_choose = values[2][0][0] except IndexError: continue print(record_is_choose) c.execute('DELETE FROM CORRES where CHARACTER = \'{}\''.format( record_is_choose)) else:
def dummy_test(): assert get_pinyin('你好', 'ni3 hao3')
from pinyin import get_pinyin def dummy_test(): assert get_pinyin('你好', 'ni3 hao3') if __name__ == "__main__": print(get_pinyin("你好?中文!中文的,符号"))