def cpinyin(city, zw, a): #装换为拼音 if a == 0: pin = xpinyin.Pinyin() city = pin.get_pinyin(city, "") zw = pin.get_pinyin(zw, "") return (city, zw) if a == 1: s = '' pin = xpinyin.Pinyin() for ch in city: p = pin.get_pinyin(ch, "")[0] s += p zw = pin.get_pinyin(zw, "") return (s, zw)
def write_features(features, wpath): p = xpinyin.Pinyin() file = open(wpath, 'w') for i in features: if i[1] > 50: file.write(i[0] + '\t' + p.get_initials(i[0], u'').lower() + '\t' + '0' + '\t' + str(i[1]) + '\n')
def one_game(): mytime1 = datetime.datetime.now() # 记时间用 # 用cnt记录答对的次数 cnt = 0 # 从列表中随机出题 for i in range(3): question = random.choice(question_list) print(question) ans = input('请输入你的答案:') # 显示正确答案用 p = xpinyin.Pinyin() if ans == p.get_pinyin(question, ' ', tone_marks='numbers'): print('答对了!') cnt += 1 else: print('答错了,正确答案是' + p.get_pinyin(question, ' ', tone_marks='numbers')) mytime2 = datetime.datetime.now() delay = mytime2 - mytime1 # 计算出时间差 print('本轮你答对了' + str(cnt) + '次!') # 显示答对数用 if cnt == 3: print('恭喜你全部答对!') print('本轮你花了', delay.seconds, '秒') # 打印出时间差的秒值 score_list.append(delay.seconds) #写在成绩表上 if delay.seconds == min(score_list): print('恭喜创造新纪录!') print('******** 成绩榜 ********') for score in score_list: # 依次打印列表中的每个元素 print(score, end='秒 ')
def find_photo(target): if ' ' in target or target.encode('UTF-8').isalpha(): name = target else: pinyin = xpinyin.Pinyin() name = ' '.join(pinyin.get_pinyin(target).split('-')) path = 'test_data/' + name + '/' if not os.path.exists(path): # 检查保存路径 os.makedirs(path) web = webdriver.Chrome() web.get('http://iamge.baidu.com') web.find_element_by_id('kw').send_keys(target) web.find_element_by_class_name('s_search').click() for i in range(1, 20): web.find_element_by_xpath('//*[@id="imgid"]/div/ul/li[' + str(i) + ']/div/a/img').click() web.switch_to.window(web.window_handles[-1]) web.save_screenshot(str(i) + '.png') img = cv2.imread(str(i) + '.png', cv2.IMREAD_COLOR) cv2.imwrite(str(i) + '.png', img[64:400, 0:700]) # 裁剪截图 shutil.move(str(i) + '.png', path + str(i) + '.png') web.close() web.switch_to.window(web.window_handles[-1]) web.quit()
def __init__(self): client = pymongo.MongoClient( settings['MONGODB_HOST'], settings['MONGODB_PORT'] ) self.db = client[settings['MONGODB_DBNAME']] self.p = xpinyin.Pinyin()
class xpinyinUtil(object): p = xpinyin.Pinyin() def pingyinFile(filename, filewrite): file = open(filename, 'r', encoding='utf-8') # lineone = file.readline() sentences = [] length = {} while True: line = file.readline() if not line: break sentence = line.strip() text = sentence.split('\t') try: shang0hai = p.get_pinyin(text[0].strip(), ' ') lenpinyin = len(shang0hai) if lenpinyin in length.keys(): length[lenpinyin] = length[lenpinyin] + 1 elif lenpinyin not in length.keys(): length[lenpinyin] = 1 sentences.append(text[1] + '\t' + shang0hai.lower().strip() + '\n') except: print('错误: ', line) print(length) filewrite = open(filewrite, 'w', encoding='utf-8') filewrite.writelines(sentences)
def suit(target): if ' ' in target or target.encode('UTF-8').isalpha(): name = target else: pinyin = xpinyin.Pinyin() name = ' '.join(pinyin.get_pinyin(target).split('-')) if len([x for x in main_sheet.find({'name': name})]): standard = [ numpy.array(x['vector']) for x in main_sheet.find({'name': name}) ] result = [] for file in os.listdir('test_data/' + name): img = cv2.imread('test_data/' + name + '/' + file, cv2.IMREAD_COLOR) dets = detector(img, 1) if len(dets) != 1: os.remove('test_data/' + name + '/' + file) continue else: for index, face in enumerate(dets): vector = list( face_rec_model.compute_face_descriptor( img, shape_predictor(img, face))) result.append( numpy.linalg.norm(numpy.array(vector) - standard)) return len([x for x in result if x < 0.6]) / len(result) # 返回成功率 else: # 数据库中没有该数据时则直接提示 print('no data!!!' + target) return 0
def query_word(word): p = xpinyin.Pinyin() piny = p.get_pinyin(word) wlst = piny.split('-') rhyme_l = [] for i in wlst: while True: if i == '': rhyme_l.append('none') break r = RhymeDct.get(i, None) if r: rhyme_l.append(r) break i = i[1:] rhyme_r = [] for idx in range(len(rhyme_l) - 1): s = '-'.join(rhyme_l[idx:]) r_s = get_words(s) if r_s: rhyme_r += r_s rhyme_r.sort(key=lambda x: x[1], reverse=True) return rhyme_r
def insert_game(request): if request.method == "POST" and request.POST: True curtime = int(time.time()) pinyin = xpinyin.Pinyin() request = json.loads(request.body) name = request.get("name") test1 = game.models.Game( name=name, pinyin_name=pinyin.get_pinyin(name, ""), pinyin_first=pinyin.get_initials(name, ""), en_name=request.get("enname", pinyin.get_pinyin(name, "")), other_name=request.get("othername", ""), title_img=request.get("special", ""), icon=request.get("icon", ""), introduce=request.get("introduce", ""), uid=0, platform=",".join(str(i) for i in request.get("gameplatform", [])), wiki=request.get("wiki", ""), state=0, create_time=curtime, update_time=curtime, publish_date=request.get("publishdate", 0), other_info=json.dumps(request.get("otherInfo", "")), game_shot=json.dumps(request.get("gameShot", [])), game_video=json.dumps(request.get("gameVideo", [])), ) test1.save() return HttpResponse("<p>数据添加成功!</p>")
def __init__(self): super().__init__() self.py = xpinyin.Pinyin() self.current = load_file(self.CURRENT_PATH) self.courses = load_file(self.COURSES_PATH) self.user = sys.argv[1] self.passwd = sys.argv[2]
def get_pinyin(word): """ Transform Chinese words into plain pinyin. """ my_pinyin = xpinyin.Pinyin() original_pinyin = my_pinyin.get_pinyin(word) pinyin_list = original_pinyin.split('-') result = '' for word in pinyin_list: result += word return result
def getPinyin(charStr): """ 获取中文拼音 Returns: 拼音 """ ret = "" if len(charStr) > 0: ret = xpinyin.Pinyin().get_pinyin(charStr, "") return ret
def get_pinyin_name(self,chars=u'王大锤',splitter=u'-',tone_marks=None,convert='lower'): result = [] nameLen = 0 if self.convert_name(chars[:2]) != -1: result=result+self.convert_name(chars[:2]). strip().split(u'-')#strip(),清除两边空白字符 nameLen=2 elif self.convert_name(chars[:1]) != -1: result.append(self.convert_name(chars[:1]). strip()) nameLen = 1 result = result+(xpinyin.Pinyin().get_pinyin(chars[nameLen:],u'-','numbers','lower').split(u'-'))#先取数字声调,再出处理 for i in range(len(result)):# for char in result 不能修改值 if tone_marks == 'marks': result[i] = xpinyin.Pinyin().decode_pinyin(result[i]) elif tone_marks == 'numbers': pass else: result[i] = result[i] [:-1]#去除数字 result[i] = xpinyin.Pinyin().convert_pinyin(result[i] ,convert) return splitter.join(result)
def __init__(self, lyric=None): self.p = xpinyin.Pinyin() # Dry lyric without whitespaces and timestamps self.dry_lyric = [] # Header lines of lyric self.header = [] # Lyric writer, maybe None self.writer = '' if lyric is not None: self.parse_lyric(lyric)
def deal_with_kv_separate(data): k2p = list(data[0].keys()) xp = xpinyin.Pinyin() pre_data = [] pinyink = dict( zip(k2p, [xp.get_initials(pinyin, u'').lower() for pinyin in k2p])) for i in data: p_d = {} for j in i: p_d[pinyink[j]] = i[j] pre_data.append(p_d) return pre_data
def call_fun(data, conversion_type=EnumConversion.NO_processing): p = xpinyin.Pinyin() if conversion_type == EnumConversion.PINYIN_Processing: data = p.get_pinyin(data, u'') elif conversion_type == EnumConversion.ENGLISH_Processing: data = Translator(from_lang='chinese', to_lang='english').translate(data) elif conversion_type == EnumConversion.PINYIN_1_Processing: data = p.get_initials(data, u'') elif conversion_type == EnumConversion.ENGLISH_1_Processing: data = Translator(from_lang='chinese', to_lang='english').translate(data).upper() return func(data, conversion_type)
def preprocess(transcript, output): data = {} translator = xpinyin.Pinyin() with open(transcript) as f: lines = f.readlines() for line in tqdm(lines): key, chars = line.split(" ", 1) pinyin = translator.get_pinyin(chars.replace("\n", ""), " ", show_tone_marks=True) pinyin = re.sub("\s+", " ", pinyin.strip()) data[key] = pinyin with open(output, "w") as f: for k, v in data.items(): f.write("{} {}\n".format(k, v))
def readContacts2(path): t = time.time() os.makedirs(path, exist_ok=True) os.makedirs(path+'/tmp', exist_ok=True) p = xpinyin.Pinyin() files = os.listdir(path) ss2 = [] for file in files: if file[-4:] == '.txt' or file[-4:] == '.csv': ss2 += makeIndex(p, path+'/'+file, path+'/tmp/'+file+'.pinyin') global ss ss = ss2 top.onKeyRelease(-1) print('makeIndex:', time.time()-t) return ss2
def deal_with_kv_separate2(data): if data: xp = xpinyin.Pinyin() k2p = list(data[0].keys()) pre_data = [{'coordinate': k2p}] pinyink = dict( zip(k2p, [xp.get_pinyin(pinyin, u'').lower() for pinyin in k2p])) for i in data: p_d = {} for j in i: p_d[pinyink[j]] = i[j] pre_data.append(p_d) return pre_data else: return data
def generate(folder): opendata = readTXT(folder) # 生成HTML p = xpinyin.Pinyin() savedata = [] cnt = 0 for i, line in enumerate(opendata): line = line.strip().replace(' ', '-').replace(',', ' ').strip() # 去除空格/合理设置空格位置方便复制/去除csv中的尾部多余逗号替换后造成的空格 if i > cnt: print('progress: %s/%s'%(cnt,len(opendata))) cnt += 2000 savedata.append(line + ';' + p.get_pinyin(line,'').lower() + ',' + p.get_initials(line,'').lower()) # 全拼做lower()处理是为了转换某些文本中带有大写英文的内容,如'Q太郎' s = '\n'.join(savedata) makejs(s)
class char_CNN: with tf.device('/cpu:0'): pin = xpinyin.Pinyin() # Load data print("正在载入数据、模型...") #主要是onehot用 sample_data_source = Dataset(config.sample_data_source) # test_data = Dataset(config.test_data_source) #获取最新的,可以改 checkpoint_file = tf.train.latest_checkpoint('./runs/1530261778/checkpoints') graph = tf.Graph() with graph.as_default(): sess = tf.Session() with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) print("载入模型成功1...") # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output_layer/predictions").outputs[0] #词向量嵌入, index2label embedding_w, embedding_dic = sample_data_source.onehot_dic_build() label_dict = {0: 'VIDEO', 1: 'TV', 2: 'APP', 3: 'CONTROL', 4: 'WEATHER', 5: 'MUSIC'} print("载入模型成功2...") @staticmethod def rec(text, sentencepinyin): try: doc_image = [] doc_vec = char_CNN.sample_data_source.doc_process(sentencepinyin, char_CNN.embedding_dic) doc_image.append(doc_vec) batch_xx = np.array(doc_image, dtype='int64') prediction = char_CNN.sess.run(char_CNN.predictions, {char_CNN.input_x: batch_xx, char_CNN.dropout_keep_prob: 1.0}) ppred = str(prediction[0]).replace('[', '').replace(']', '') label_pred = char_CNN.label_dict[int(ppred)] # str转int int转label print(text, '\t', label_pred) return label_pred except: print(text, "rec text wrong!")
def send_msg_to_wx(msg, wx_url=list(), groups=None, users=None): # print(wx_url) headers = {"Content-Type": "application/json; charset=UTF-8"} wx_urls = list() if len(wx_url) != 0: for temp_wx_url in wx_url: if temp_wx_url != '': wx_urls.append(temp_wx_url) # 通知自动化通知群 wx_urls.append( 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=da4273ef-9e0d-4971-a9ed-1938cd932123' ) send_mobile_list = '' if users is not None: user = xpinyin.Pinyin().get_pinyin(users) user = user.replace('-', '') if user in user_info.keys(): send_mobile_list += user_info[user]['mobile'] send_mobile_list += ',' if groups is not None: for group in groups.replace(' ', '').split(','): group_users = tag_contact[group]['users'] group_robot_address = tag_contact[group]['wx_robot_address'] if group_users == 'all': send_mobile_list += '@all' send_mobile_list += ',' else: for user in group_users.replace(' ', '').split(','): send_mobile_list += user_info[user]['mobile'] send_mobile_list += ',' if group_robot_address != '': wx_urls.append(group_robot_address) send_mobile_list = list(set(send_mobile_list.replace(' ', '').split(','))) # print(send_mobile_list) array = { "msgtype": "text", "text": { "content": msg, "mentioned_mobile_list": send_mobile_list, } } print('消息通知: ', wx_urls) for wx_url in wx_urls: res = requests.post(wx_url, json=array, headers=headers)
def get_weather(name): result = dict() result['name'] = "城市输入有误" result['temperature'] = None result['wind_direction'] = None result['wind_strength'] = None result['humidity'] = None result['time'] = None user_agent = { 'User-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134" } if isinstance(name, str): name = name.strip() if name.endswith('市'): name = name[:-1] city_name = xpinyin.Pinyin().get_pinyin(name, "") url = base_url + city_name response = requests.get(url, headers=user_agent) if response.status_code == 200: doc = pq(response.text) weather_node = doc("dl.weather_info dd") result['name'] = weather_node('.name h2').text() result['temperature'] = weather_node('.weather p').text() result['wind_direction'] = weather_node( '.shidu b:nth-child(2)').text()[3:].split(" ")[0] result['wind_strength'] = weather_node( '.shidu b:nth-child(2)').text()[3:].split(" ")[1] result['humidity'] = weather_node( '.shidu b:nth-child(1)').text()[3:] result['time'] = weather_node('.week').text().split( '\u3000')[0] print(result) return result else: # print(result) return result else: # print(result) return result
def __button_confirm_callback(self, screen_id): # 货物入库界面按下确认键 self.logger.info("入库货物") name = self.input_name_entry.get() number_str = self.input_number_entry.get() money_str = self.input_monery_entry.get() if not name or not number_str or not money_str: self.msgbox.show(screen_id, 240, 80, '请输入有效数') return #在goodnamelist中寻找对应货物名称 flag = False for display_item in global_variable.goods_list: if name == display_item[1]: name_pinyin = display_item[2] product_id = display_item[0] flag = True break if flag is False: #没在goodnamelist中找到对应货物,视其为新货物 unicode_name = u'%s' % name name_pinyin = xpinyin.Pinyin().get_pinyin(unicode_name) product_id = None self.logger.info("id={} name={} py={}".format(product_id, name, name_pinyin)) input_number = int(number_str, 10) input_money = float(money_str) if self.sell_mode == True: # print '总价格' money = input_money else: # print '单独价格' money = input_number * input_money record_list = [[product_id, name, name_pinyin, 0, input_number, money],] self.logger.info(record_list) self.db.update(record_list) self.queue.put('update')
def cut_td_idf_pinyin(sources_path, target_path): # 获取拼音 """ 汉语转拼音 :param path: :return: """ pin = xpinyin.Pinyin() corpus = txtRead(sources_path) topic_ques_all = [] corpus_count = 0 for corpus_one in corpus: corpus_count += 1 # time1 = time.time() corpus_one_clear = corpus_one.replace(' ', '').strip() ques_q2b = strQ2B(corpus_one_clear.strip()) ques_q2b_syboml = get_syboml(ques_q2b) ques_q2b_syboml_pinying = pin.get_pinyin( ques_q2b_syboml.replace(' ', '').replace(' ', '').strip(), ' ') topic_ques_all.append(ques_q2b_syboml_pinying + '\n') # time2 = time.time() # print(str(corpus_count) + 'time:' + str(time2 - time1)) txtWrite(topic_ques_all, target_path) print("cut_td_idf_pinyin ok! " + sources_path)
class test: def get_wav_list(filename: str): ''' 读取一个wav文件列表,返回一个存储该列表的字典类型值 ps:在数据中专门有几个文件用于存放用于训练、验证和测试的wav文件列表 ''' txt_obj = open(filename, 'r', encoding='utf-8') # 打开文件并读入 txt_text = txt_obj.read() txt_lines = txt_text.split('\n') # 文本分割 dic_filelist = {} # 初始化字典 list_wavmark = [] # 初始化wav列表 for i in txt_lines: print(i) if (i != ''): txt_l = i.split(' ') dic_filelist[txt_l[0]] = txt_l[1] list_wavmark.append(txt_l[0]) txt_obj.close() return dic_filelist, list_wavmark #style = Style.TONE3 s = xpinyin.Pinyin() data_path = "D:\\AISHELL-2-sample\\iOS\\data\\trans.txt" dict_wav, list_wav = get_wav_list(data_path) dict_new_wav = dict() content = "" for i in list_wav: pinyin_content = s.get_pinyin(dict_wav[i], " ", tone_marks="numbers") content = f"{content}\n{i} {pinyin_content}" #dict_new_wav[i] = " ".join(lazy_pinyin(dict_wav[i], style=style)) print(content) f = open("D:\\AISHELL-2-sample\\iOS\\data\\syllable.txt", 'w') f.write(content)
def generate(demofile, openfile, savefile): # 处理模板 with open(demofile, 'r', encoding='utf-8') as f: s = f.read() template = [s.split('...')[0] + '...\n', '</p>' + s.split('</p>')[-1]] # 处理数据 opendata = readTXT(openfile) # 生成HTML p = xpinyin.Pinyin() savedata = [template[0]] cnt = 0 for i, line in enumerate(opendata): if i > cnt: print('progress: %s/%s' % (cnt, len(opendata))) cnt += 1000 savedata.append( line + ';' + p.get_pinyin(line, '').lower() + ',' + p.get_initials( line, '').lower()) # 全拼做lower()处理是为了转换某些文本中带有大写英文的内容,如'Q太郎' savedata.append(template[1]) with open(savefile, 'w', encoding='utf-8') as f: # utf-8为避免某些无法转换为拼音的字符可以顺利写入文件 f.write('\n'.join(savedata))
import os import pickle import numpy as np import pandas as pd import pkg_resources import xpinyin py = xpinyin.Pinyin() from tqdm import tqdm from scipy import stats from hpack.huffman import HuffmanEncoder from hpack.huffman_constants import REQUEST_CODES, REQUEST_CODES_LENGTH try: from websites import Website except: from .websites import Website MODEL_PATH = pkg_resources.resource_filename('qris', 'models/') def_dic_en = os.path.join(MODEL_PATH, 'queries_AOL.csv') def_dic_zh = os.path.join(MODEL_PATH, 'queries_THU.csv') def_bigrams = os.path.join(MODEL_PATH, 'bigrams.csv') class Queries: ''' Queries of prediction target set. ''' def __init__(self, website,
} return {'json': json, 'headers': self.headers} {% endfor -%} '''#创建pytest接口 #{%for %}{%endfor-%}for循环语句 #{{}}传递参数 content1=Template(template1).render(items=items)#初始化template,并传递参数items #print(content1)#输出结果 #print("总共生成{}个接口".format(len(items))) filename="ApiTest"+".py" dirname1="name" save_file(dirname1,filename,content1) #****************************************************************# modules=getModuleName(projectId)#获取所有大模块id for module in modules: pinyin=xpinyin.Pinyin() dirname="test_"+pinyin.get_pinyin(module["name"],"")#?? actions=[] #print("nihao") #print(module["_id"]) if getTid(module["_id"])!=[]:#获取每个模块下的接口id for api in getTid(module["_id"]): actions.append(getApiInfo(api))#获取每个接口的接口详情 #print("action") #print(actions) else: pass for item in actions: name=[]
def main(): monsters = [] for fn in os.listdir(os.path.join(DATA_DIR, "monsters")): monster_name = os.path.splitext(fn)[0] monster = Monster(monster_name) with open(os.path.join(DATA_DIR, "monsters", fn)) as fp: print("Parsing %s" % fn) for line in fp.readlines(): line = line.replace("\n", "") cols = line.split(",") assert len(cols) == 4 place_type = cols[0].strip() gq = cols[1].strip() place_and_amount = cols[2].strip() cost = cols[3].strip() places_amounts_typeid_level = extract_places_amounts_typeid_level(place_type, gq, place_and_amount) # costs = cost.split("/") total_amount = int(costs[0]) total_cost_strength = int(costs[1]) assert total_amount == sum([am for _, am, _, _, _, _ in places_amounts_typeid_level]) assert total_cost_strength == sum([cs for _, _, _, cs, _, _ in places_amounts_typeid_level]) for place, amount, cost_challenge, cost_strength, typeid, level in places_amounts_typeid_level: xs = XS(place, amount, cost_challenge, cost_strength, typeid, level) monster.xs_list.append(xs) monsters.append(monster) print("\nBEGIN " + ">" * 74) print(monster) print("END " + "<" * 76 + "\n") def get_key(x): pin = xpinyin.Pinyin() return pin.get_pinyin(x.name) monsters = sorted(monsters, key=get_key) places = [] for monster in monsters: for xs in monster.xs_list: places.append(xs.place) print("len(all_places) = %d" % len(places)) places = sorted(list(set(places))) print("len(all_places) = %d" % len(places)) print("len(monsters) = %d" % len(monsters)) n_places = len(places) n_monsters = len(monsters) mat_amount = np.zeros((n_places, n_monsters), dtype=np.int) mat_cost_challenge = np.zeros((n_places,), dtype=np.int) mat_cost_strength = np.zeros((n_places,), dtype=np.int) mat_typeid = np.zeros((n_places,), dtype=np.int) mat_level = np.zeros((n_places,), dtype=np.int) for m_ind, monster in enumerate(monsters): for xs in monster.xs_list: p_ind = np.where([p == xs.place for p in places]) assert len(p_ind) == 1 p_ind = p_ind[0] assert p_ind.shape[0] == 1 p_ind = p_ind[0] mat_amount[p_ind, m_ind] = xs.amount mat_cost_challenge[p_ind] = xs.cost_challenge mat_cost_strength[p_ind] = xs.cost_strength mat_typeid[p_ind] = xs.typeid mat_level[p_ind] = xs.level pin = xpinyin.Pinyin() data = { "monsters": [m.name for m in monsters], "places": places, "amounts": mat_amount.tolist(), "typeids": mat_typeid.tolist(), "levels": mat_level.tolist(), # "cost_challenge": mat_cost_challenge.tolist(), # "cost_strength": mat_cost_strength.tolist(), # "valid": mat_valid.tolist(), "num_monsters": str(len(monsters)), "num_places": str(len(places)), } with open(os.path.join(DATA_DIR, "data.json"), "w") as fp: json.dump(data, fp)