def change_pinyin(name): ret =lazy_pinyin(name,errors='ignore') short_cut = lazy_pinyin(name, style=Style.FIRST_LETTER) result = ret[:1]+short_cut[1:] # 返回不同组合的名字 return ''.join(ret),'_'.join(ret),'_'.join(ret[:2])+''.join(ret[2:]),''.join(result),'_'.join(result[:2])+''.join(result[2:])
def test_custom_pinyin_dict(): hans = '桔' try: assert lazy_pinyin(hans, style=TONE2) == ['ju2'] except AssertionError: pass load_single_dict({ord('桔'): 'jú,jié'}) assert lazy_pinyin(hans, style=TONE2) == ['ju2']
def test_custom_pinyin_dict2(): hans = ['同行'] try: assert lazy_pinyin(hans, style=TONE2) == ['to2ng', 'ha2ng'] except AssertionError: pass load_phrases_dict({'同行': [['tóng'], ['xíng']]}) assert lazy_pinyin(hans, style=TONE2) == ['to2ng', 'xi2ng']
def get_confusion_word_set(word): confusion_word_set = set() candidate_words = list(known(edit_distance_word(word, cn_char_set))) for candidate_word in candidate_words: if lazy_pinyin(candidate_word) == lazy_pinyin(word): # same pinyin confusion_word_set.add(candidate_word) return confusion_word_set
def create_device_user(redis, request): _uuid = request.get("uuid") if not _uuid: logging.error("no uuid provided. %s" % request) return None _is_service_user = bool(request.get("is_service_user")) _is_anonymous_user = bool(request.get("is_anonymous_user")) _is_owner_user = bool(request.get("is_owner_user")) _user_email = request.get("user_email") if not _user_email: import strgen _user_email = strgen.StringGenerator("[\d\w]{10}").render() + "@" + strgen.StringGenerator("[\d\w]{10}").render() _user_icon = request.get("user_icon") if not _user_icon: _user_icon = random_identicon(_user_email) _user_name = request.get("user_name") _user_mobile = request.get("user_mobile") _user_fullname = request.get("user_fullname") _user_password = request.get("user_password") _user_language = request.get("user_language") or "cn" _ent_user_uuid = request.get("ent_user_uuid") _ent_user_createtime = request.get("ent_user_createtime") import pypinyin if not isinstance(_user_fullname, unicode): _user_fullname = _user_fullname.decode("utf-8") _user_pinyin = "".join(pypinyin.lazy_pinyin(_user_fullname)) _user_py = "".join(pypinyin.lazy_pinyin(_user_fullname, style=pypinyin.FIRST_LETTER)) _values = { "uuid": _uuid, "is_service_user": _is_service_user, "is_owner_user": _is_owner_user, "is_ppmessage_user": _is_ppmessage_user, "is_anonymous_user": _is_anonymous_user, "user_name": _user_name, "user_mobile": _user_mobile, "user_email": _user_email, "user_icon": _user_icon, "user_fullname": _user_fullname, "user_password": _user_password, "user_pinyin": _user_pinyin, "user_py": _user_py, "ent_user_uuid": _ent_user_uuid, "ent_user_createtime": _ent_user_createtime } _row = DeviceUser(**_values) _row.async_add(redis) _row.create_redis_keys(redis) return _values
def test_errors_callable(): def foobar(chars): return 'a' * len(chars) class Foobar(object): def __call__(self, chars): return 'a' * len(chars) n = 5 assert lazy_pinyin('あ' * n, errors=foobar) == ['a' * n] assert lazy_pinyin('あ' * n, errors=Foobar()) == ['a' * n]
def post_or_put(pk=None, dic=None): tobj = dic_to_tobj(dic, thirdparty_svc.ers.TCity, True) if not tobj.pinyin: tobj.pinyin = ''.join(lazy_pinyin(tobj.name, errors='ignore')) if not tobj.abbr: tobj.abbr = ''.join( lazy_pinyin(tobj.name, style=FIRST_LETTER, errors='ignore')).upper() if not tobj.sort: tobj.sort = 2000 with thrift_client('ers') as ers: result = ers.save_city(pk, tobj) return result
def doTarget(isp, ipList): fd =open(target,'a') if isp == 'tel': title = 'telcom' menu = '中国电信' elif isp == 'uni': title = 'unicom' menu = '中国联通' elif isp =='mob': title = 'CMCC' menu = '中国移动' else : title = 'EDU' menu = '中国教育' line = "+%s\nmenu = %s\ntitle = %s\n\n" % (title, menu, title) fd.writelines(line) for ip in ipList.keys(): subTitle = ''.join(lazy_pinyin(ipList[ip]))+"-"+ip.split('.')[0] line2 = '++%s\nmenu = %s\ntitle = %s\nhost = %s\n\n' %(subTitle, ipList[ip].encode('utf8'), ip, ip) fd.writelines(line2) fd.close()
def CJKFirstLetters(str): pinyins = lazy_pinyin(str) firstLetters = '' for pinyin in pinyins: firstLetters += pinyin[0] return firstLetters
def parse_profile(self,response): hxs=Selector(response) item=response.meta['item'] page=response.meta['page'] cont=hxs.xpath('//div[@class="mm-p-info mm-p-base-info"]') ls=cont.xpath('ul/li/span').extract() ls1=cont.xpath('ul/li/p').extract() lists=map(filterHtml,ls) profiles=map(filterHtml,ls1) exprince=hxs.xpath('//div[@class="mm-p-info mm-p-experience-info"]/p').extract() item['nicename']=lists[0].strip() item['borthday']=lists[1].replace(u'\xa0','') item['job'] =lists[3].strip(u'型') item['blood'] =lists[4].strip(u'型') item['school'] ='' item['specialty'] ='' if lists[5]!='': m=re.split(u'\xa0{2,}',lists[5]) if len(m)>1: item['school'] =m[0] item['specialty'] =m[1] item['style'] =lists[6].strip() item['height'] =profiles[0].strip('CM') item['weight'] =profiles[1].strip('KG') item['solid'] =profiles[2].strip() item['bar'] =bar(profiles[3]) item['shoes'] =profiles[4].strip(u'码') item['exprince']=filterHtml(exprince[0]) left_img=hxs.xpath('//div[@class="mm-p-modelCard"]/a/img/@src').extract() item['life_img']='https:'+left_img[0] if left_img else '' item['image_urls']=[item['faceimg'],item['big_img'],item['life_img']] username=lazy_pinyin(item['nicename']) item['pinyin']=''.join(username) yield item
def _du(self): _request = json.loads(self.request.body) _user_uuid = _request.get("user_uuid") if not _user_uuid: self.setErrorCode(API_ERR.NO_PARA) return _o = redis_hash_to_dict(self.application.redis, DeviceUser, _user_uuid) if not _o: self.setErrorCode(API_ERR.NO_OBJECT) return # not return the password default return_password = False if "return_password" in _request: return_password = _request["return_password"] if not return_password: del _o["user_password"] _fn = _o.get("user_fullname") if _fn != None and not isinstance(_fn, unicode): _fn = _fn.decode("utf-8") _rdata = self.getReturnData() _rdata.update(_o) _rdata["pinyinname0"] = "".join(lazy_pinyin(_fn)) _rdata["pinyinname1"] = "".join(list(itertools.chain.from_iterable(pinyin(_fn, style=pypinyin.INITIALS)))) _app_uuid = _get_config().get("team").get("app_uuid") _o = redis_hash_to_dict(self.application.redis, AppInfo, _app_uuid) _rdata.update({"team": _o}); return
def parse(self,response): hxs=Selector(response) items=[] allSite=hxs.xpath('//ul/li[1]/dl/dt/a') if len(allSite)>0: url = response.url province = os.path.basename(url) item=ProvinceItem() item['link'] = allSite.xpath('@href').extract()[0] item['code'] = self.city_codes.get(province) items.append(item) sites=hxs.xpath('//ul/li[1]/dl/dd/a') for site in sites: item=ProvinceItem() item['name'] = site.xpath('text()').extract()[0] item['link'] = site.xpath('@href').extract()[0] item['code'] = int(os.path.basename(item['link']).split('-')[0]) item['parent'] = item['code'] - item['code'] % 100 item['pinyin'] = ''.join(lazy_pinyin(item['name'])) items.append(item) #self.saveCity(item); for item in items: yield Request(item['link'],meta={'code':item['code']},callback=self.parse2)
def process_item(self, item, spider): if spider.name=='songs': try: cursor1=self.db.cursor() author_id=0 keys=lazy_pinyin(item['author']+'_'+item['dynasty']) key=''.join(keys) kwd='' if self.redis_conn.hexists('author',item['author_id']): kwd=item['author_id'] elif self.redis_conn.hexists('author',key): kwd=key if kwd!='': author_id=self.redis_conn.hget('author',kwd) else: sql="insert into `author` (`name`,`dynasty`,`pinyin`) values(%s,%s,%s)" cursor1.execute(sql,[item['author'],item['dynasty'],item['pinyin']]) author_id=str(cursor1.lastrowid) self.redis_conn.hsetnx('author',key,author_id) created=int(time.time()) sql1="insert into `content` (`author_id`,`title`,`created`,`view_url`,`comment_num`,`point`,`content`) values(%s,%s,%s,%s,%s,%s,%s)" cursor1.execute(sql1,[author_id,item['title'],created,item['view_url'],item['comment_nums'],item['point'],item['content']]) cursor1.close() except mysql.connector.Error as e: msg=u'view_url:%s 写入数据失败:%s' % (item['view_url'],e) logger.error(msg) cursor1.close() finally: cursor1.close() return item else: return item
def correctOneWord(oldWord): word = oldWord.decode('utf-8') pyList = lazy_pinyin(word) pyStr = "" maxSame = 0 resultWord = "" same = 0 count = 0 for py in pyList: pyStr+=py.encode('utf-8') print pyStr result = ChineseWordModel.objects.filter(pinyin=pyStr).order_by('idf') if len(result) == 0: print "pinyin do not exist" return oldWord for r in result: print r['word'] print r['idf'] same = findSameChar(word,r['word']) if(same>maxSame): maxSame = same resultWord = r['word'] print "maxSame",maxSame if maxSame == 0: resultWord = result[0]['word'] print "no similar word" return resultWord
def txt_to_voice(text, name='test', export_path=EXPORT_PATH): """ 将文字转换为音频 :param text: 需要转换的文字 :param name: 生成的音频文件名 :return: """ pinyin_list = lazy_pinyin(text, style=TONE3) new = AudioSegment.empty() for piny in pinyin_list: piny_song = VOICE_DICT.get(piny) if piny_song is None and piny and piny[-1] not in '0123456789': # 没有音调 piny = piny + '5' piny_song = VOICE_DICT.get(piny, silent) # 交叉渐入渐出方法 # with_style = beginning.append(end, crossfade=1500) # crossfade 就是让一段音乐平缓地过渡到另一段音乐,crossfade = 1500 表示过渡的时间是1.5秒。 # if new and piny_song: # crossfade = min(len(new), len(piny_song), 1500)/60 # new = new.append(piny_song, crossfade=crossfade) if not piny_song: continue new += piny_song new.export(os.path.join(export_path, "{}.mp3".format(name)), format='mp3')
def initial(folderName, type): documents = os.listdir('./'+folderName+'/') index = 1 # the same sequence with model in models.py model = {} documentModel = {} wordModel = {} ld = len(documents) if type == 0: hrefList = open('./html_sohu.txt', 'r').readlines() titleList = open('./title_sohu.txt', 'r').readlines() for document in documents: if index % 50 == 0: print str(index) + ' / ' + str(ld) documentName = document[0:4] documentModel[documentName] = {'length': 0, 'href': hrefList[int(documentName)-1].split('\n')[0].split('\t')[1], 'title':titleList[int(documentName)-1].split('\n')[0].split('\t')[1]} words = open('./'+folderName+'/'+document, 'r').readlines() for word in words: singleWord = word.split('\n')[0] if len(singleWord) < 3: continue singleWordUnicode = singleWord.decode('utf-8') pinyins = lazy_pinyin(singleWordUnicode) pinyinStr = '' for pinyin in pinyins: pinyinStr = pinyinStr + pinyin.encode('utf-8') if len(pinyinStr) < 2: continue if (singleWord in wordModel) == False: wordModel[singleWord] = {'length': len(pinyins), 'idf': 0, 'pinyin': pinyinStr} if ((singleWord, documentName) in model) == False: model[(singleWord, documentName)] = {'tfIdf': 0, 'times': 1, 'tf': 0} else: times = model[(singleWord, documentName)]['times'] + 1 model[(singleWord, documentName)]['times'] = times index = index + 1 else: hrefList = open('./html_wiki.txt', 'r').readlines() titleList = open('./title_wiki.txt', 'r').readlines() for document in documents: if index % 50 == 0: print str(index) + ' / ' + str(ld) documentName = document[0:4] documentModel[documentName] = {'length': 0, 'href': hrefList[int(documentName)-1].split('\n')[0].split('\t')[1], 'title':titleList[int(documentName)-1].split('\n')[0].split('\t')[1]} words = open('./'+folderName+'/'+document, 'r').readlines() for word in words: singleWord = word.split('\n')[0] l = len(singleWord) if l < 3 or l > 15: continue if (singleWord in wordModel) == False: wordModel[singleWord] = {'length': l, 'idf': 0} if ((singleWord, documentName) in model) == False: model[(singleWord, documentName)] = {'tfIdf': 0, 'times': 1, 'tf': 0} else: times = model[(singleWord, documentName)]['times'] + 1 model[(singleWord, documentName)]['times'] = times index = index + 1 return model, documentModel, wordModel
def _du(self, _request, _rdata): if "user_uuid" not in _request: self.setErrorCode(API_ERR.NO_PARA) logging.error("Error for no para: %s.", (str(_request))) return _o = redis_hash_to_dict(self.application.redis, DeviceUser, _request["user_uuid"]) logging.info(_o) if _o == None: self.setErrorCode(API_ERR.NO_OBJECT) logging.error("Error for no user uuid: %s." % (_request["user_uuid"])) return # not return the password default return_password = False if "return_password" in _request: return_password = _request["return_password"] if not return_password: del _o["user_password"] _fn = _o.get("user_fullname") if _fn != None and not isinstance(_fn, unicode): _fn = _fn.decode("utf-8") _rdata.update(_o) _rdata["pinyinname0"] = "".join(lazy_pinyin(_fn)) _rdata["pinyinname1"] = "".join(list(itertools.chain.from_iterable(pinyin(_fn, style=pypinyin.INITIALS)))) return
def save(self, *args, **kwargs): # 输入中文 if self.simple_name == '' or self.simple_name == None: self.simple_name = '-'.join(lazy_pinyin(self.name)) # 输入英文 if self.simple_name == '' or self.simple_name == None: self.simple_name = self.name super(Category, self).save(*args, **kwargs)
def delete(self, item, pinyin=False, seg=False): self.item_check(item) uid = hashlib.md5(item['term'].encode('utf8')).hexdigest() for prefix in self.prefixs_for_term(item['term'], seg=seg): self._delete_prefix(prefix, uid) if pinyin: prefix_pinyin = ''.join(lazy_pinyin(prefix)) self._delete_prefix(prefix_pinyin, uid)
def pinyinFirst(name): print name pinyin = lazy_pinyin(name) retPinyin = [] for element in pinyin: retPinyin =retPinyin + [element[0]] return retPinyin
def weather_crawler(city): city_pinyin = ''.join(lazy_pinyin(city)) weather_url = 'http://lishi.tianqi.com/{}/index.html'.format(city_pinyin) weather_web_data = requests.get(weather_url) weather_soup = BeautifulSoup(weather_web_data.text,'lxml') weather = weather_soup.select(' div.tqtongji > p')[0].get_text()[0:-15] wind = weather_soup.select(' div.tqtongji > ul')[1].get_text().replace('\n',' ') print(weather,'\n\n'+'风力情况为:\n',wind)
def to_dict(self): return { 'id': self.id, 'name': self.name, 'cur_epi': self.cur_epi, 'on_air_epi': self.on_air_epi, 'on_air_day': self.on_air_day, 'seeker': self.seeker, 'name_pinyin': ''.join(lazy_pinyin(self.name, Style.FIRST_LETTER)) }
def __translate_title(self,title): for item in title: if isinstance(item, str): zhongwen = item if zhongwen in self.__zhongwen_2_pinyin: continue else: pinyin = "_".join(lazy_pinyin(zhongwen, errors='ignore')) self.__pinyin_2_zhongwen[pinyin] = zhongwen self.__zhongwen_2_pinyin[zhongwen] = pinyin elif isinstance(item, list): zhongwen = item[0] zhongwen2 = item[1] if zhongwen in self.__zhongwen_2_pinyin: continue else: pinyin = "_".join(lazy_pinyin(zhongwen, errors='ignore')) self.__pinyin_2_zhongwen[pinyin] = zhongwen + "(" + zhongwen2 + ")" self.__zhongwen_2_pinyin[zhongwen] = pinyin
def get_pinyin_first(org_str): """获取字符串的拼音首字母""" if org_str: if isinstance(org_str, unicode): result_list=lazy_pinyin(org_str) return result_list[0][0] else: # 不尝试转码? #raise Exception('%s should be unicode to get pinyin' % org_str) return '' else: return ''
def parse(self,response): curr_url=response.url curr_query=urlparse.urlparse(curr_url) view_url=curr_query.path hxs = Selector(response) warp=hxs.xpath('//div[@class="shileft"]') if warp: song=SongsItem() song['view_url'] =view_url try: title=warp.xpath('div[@class="son1"]/h1/text()').extract()[0] son2=warp.xpath('div[@class="son2"]') temp=son2.css('.line1 *::text').extract() point=0 comment_num=0 if temp: comment=temp[0].strip() m=re.search('(\d+)',comment) comment_num=m.group(1) if m else 0 point=temp[1].strip() if len(temp)>1 else 0 dynasty=son2.xpath('p[1]/text()').extract() dynasty=dynasty[0] if dynasty else '' author_temp=son2.xpath('p[2]/a/text()').extract() author=author_temp[0] if author_temp else son2.xpath('p[2]/text()').extract()[0] author_name=lazy_pinyin(author) url_temp=son2.xpath('p[2]/a/@href') author_id=0 if url_temp: author_url=url_temp.extract()[0] m1=re.search('(\d+)',author_url) author_id=m1.group(1) strs=son2.extract()[0] compiles=re.compile(r'</span></p>(.*)?.*?</div>',re.S) m=re.search(compiles,strs) content=m.group(1).strip() if m.groups() else '' relation_urls=hxs.xpath('//div[@class="son5" and @id]/p[1]/a/@href').extract() song['title'] =title song['comment_nums']=comment_num song['point'] =point song['dynasty']=dynasty song['author'] =author song['content']=content song['pinyin']=''.join(author_name) song['author_id']=author_id song['relation_urls']=relation_urls if relation_urls else '' if relation_urls: for i in relation_urls: url='http://so.gushiwen.org'+i yield Request(url,callback=self.parse_relation,meta={'item':song},errback=self.catchError) else: yield song except Exception, e: msg=u"urls:%s message:%s" % (curr_url,str(e)) logger.error(msg)
def modifytag(filename,*attr): if len(attr)==None: print('no attr given ,please input "artist","title" ...') quit() else: audiofile=eyed3.load(filename) for changeattr in attr: pinyinattr=pypinyin.lazy_pinyin(getattr(audiofile.tag,changeattr)) pinyinattr=''.join(str(x) for x in pinyinattr) setattr(audiofile.tag,changeattr,pinyinattr.decode('utf-8')) audiofile.tag.save()
def get_ready_data_file_path(city_name, data_type, source_name, data_label): city_name_pinyin = ''.join(lazy_pinyin(city_name)) # raw data path : poi/poi_data/city/raw_data /date/1.anjuke_old 2.anjuke_new 3.lianjia_old 4.lianjia_new 5.baidu 6.fangtianxia # ready_data path: poi/poi_data/city/ready_data/1.anjuke 2.lianjia 3.baidu 4.fangtianxia path = os.path.join(os.path.dirname(os.getcwd()), 'poi', 'poi_data', city_name_pinyin, data_type) if not os.path.exists(path): os.makedirs(path) file_path = path + '\{}_{}_{}.tsv'.format(city_name_pinyin, source_name, data_label) if not is_windows_system(): linux_file_path = file_path.replace('\\', '/') return linux_file_path return file_path
def add(self, item, pinyin=False, seg=False): self.item_check(item) term = item['term'] score = item.get('score', 0) uid = hashlib.md5(item['term'].encode('utf8')).hexdigest() self.redis.hset(self.db, uid, json.dumps(item)) for prefix in self.prefixs_for_term(term, seg): self._index_prefix(prefix, uid, score=score) if pinyin: prefix_pinyin = ''.join(lazy_pinyin(prefix)) self._index_prefix(prefix_pinyin, uid, score=score)
def hanzi_to_pinyin(txt): """ Returns a version of txt with Chinese characters replaced with alphanumeric pinyin romanization Args: txt -- Chinese text with Chinese characters in it (unicode) Returns: unicode with romanized version of txt """ pinyin = pyp.lazy_pinyin(txt, style=pyp.TONE2) return u''.join(pinyin)
def generatenewcontact(contact_str_list): namepattern = re.compile(r'FN:((?:.|\n)*?)\n') for index, people in enumerate(contact_str_list): if people.find('X-PHONETIC-LAST-NAME') >= 0 : pass else: name = namepattern.findall(people) namepinyin = lazy_pinyin(name) tempstr = "X-PHONETIC-LAST-NAME:"+''.join(namepinyin)+'\n' contact_str_list[index] = contact_str_list[index]+tempstr return contact_str_list
def searchMusic(self, search_song, headers): searchUrl = 'https://music.163.com/weapi/cloudsearch/get/web?csrf_token=' musicStr = ''.join(lazy_pinyin(search_song)) key = '{hlpretag:"",hlposttag:"</span>",s:"' + musicStr + '",type:"1",csrf_token:"",limit:"30",total:"true",offset:"0"}' dataStr = str({'s': musicStr, 'csrf_token': ''}) FormData = self.GetFormData(key) response = requests.request( 'POST', searchUrl, data=FormData, headers={ 'User-agent': headers, 'referer': 'https://music.163.com/', 'Host': 'music.163.com', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' }) song_writer = [] song_id = [] song_name = [] song_zj = [] song_dict = json.loads(response.text) # for song in song_dict['result']['songs']: # print(song) # song_name.append(song['name']) # song_id.append(song['id']) # song_ar = song['ar'] # if len(song_ar) == 2: # song_writer.append(song_ar[0]['name'] + '_' + song_ar[1]['name']) # else: # song_writer.append(song_ar[0]['name']) # song_zj.append(song['al']['name']) return song_dict['result']['songs']
def __review(self, query): if self.__exist_filter(query.get('filter')) or query.get('start') > 0: return False, None if isinstance(query.get('input'), str) or isinstance( query.get('input'), unicode): name = query.get('input').replace(' ', '').replace('+', '').replace('-', '') es_query = {} if len(name) < 5: es_query.setdefault('bool', {}).setdefault('should', []).append( templates.get_string_template( 'name', ' '.join(name), '100%')) es_query.setdefault('bool', {}).setdefault('should', []).append( templates.get_string_template( 'alias', ''.join(lazy_pinyin(name, errors='ignore')), '100%')) sort = query.get('sort', 1) order = query.get('order', 'default') hits = self.es.search(index='xiniudata', doc_type='company', body={ "query": es_query, "sort": self.__generate_sort_search(sort, order), "from": 0, "size": 10 }) # print es_query return True, hits return False, None
def deal_text(text: str, pinyin_char_table: dict, record: dict, binary_record: dict): start = len(pinyin_char_table) + 1 stop = len(pinyin_char_table) + 2 left = start notation = lazy_pinyin(text, style=STYLE_NORMAL, errors=lambda x: [None] * len(x)) for pinyin, char in zip(notation, text): if pinyin is None: right = start else: pinyin = REGULAR_PINYIN.get(pinyin, pinyin) pinyin = FORCE_PINYIN.get(char, pinyin) right = pinyin_char_table.get((pinyin, char), start) if right == start: print('WARNING: strang (pinyin, char):', pinyin, char) record[right] += 1 if right != start: binary_record[left][right] += 1 elif left != start: binary_record[left][stop] += 1 left = right return
def ad_update_pic(): file = request.files['file'] if not is_admin_login(request): return jsonify({"code": 208, "msg": "登录信息已经过期"}) admin_id = get_admin_id_by_cookie(request) if exist_admin(admin_id): filename = "".join(lazy_pinyin(file.filename)) b = "." redis = get_redis_cli() incr = redis.incr('admin-images') filename = str(incr) + str(filename[filename.rfind(b):]) if file and allowed_file(filename): filename = secure_filename(filename) file.save(os.path.join(app.config['ADMIN_UPLOAD_FOLDER'], filename)) url = "http://192.168.195.10:5005/admin/images/" + filename admin = Admin.query.filter(Admin.admin_id == admin_id).first() admin.head_pic = url db.session.commit() return {"code": "200", "msg": "上传成功", "url": url} else: return {"code": "203", "msg": "上传失败"} else: return {"code": "203", "msg": "抱歉,管理员不存在"}
def get_column_list(): try: newsDB = NewsDB() newsCount = [item for item in newsDB.group_count("newsDetail","column") if item["column"] in columns] newsCountDict = {item["column"]:item["count"] for item in newsCount} columnsInfo = [{ "id": idx, "title": title, "desc": desc, "cover": "%s.jpg" % "".join(lazy_pinyin(title)), "newsCount": newsCountDict[title] } for idx, (title, desc) in enumerate(columns.items())] # columnsInfo.sort(key=lambda column: lazy_pinyin(column["title"])) except Exception as err: jsonPack = {"errcode": -1, "error": repr(err)} raise err else: jsonPack = {"errcode": 0, "columns": columnsInfo} finally: newsDB.close() return json.dumps(jsonPack)
def speak(self, text): syllables = lazy_pinyin(text, style=pypinyin.TONE3) print(syllables) delay = 0 def preprocess(syllables): temp = [] for syllable in syllables: for p in TextToSpeech.punctuation: syllable = syllable.replace(p, "") if syllable.isdigit(): syllable = atc.num2chinese(syllable) new_sounds = lazy_pinyin(syllable, style=pypinyin.TONE3) for e in new_sounds: temp.append(e) else: temp.append(syllable) return temp syllables = preprocess(syllables) for syllable in syllables: path = "syllables/" + syllable + ".wav" _thread.start_new_thread(TextToSpeech._play_audio, (path, delay)) delay += 0.355
def cut(self, sents): self.num += 1 line = [] tags = [] tag_dic = {'0': 'tag0', '1': 'tag1', '2': 'tag2'} word_tags = sents.split(' ') for word_tag in word_tags: tmp = word_tag.split('/') word = tmp[0] tag = [] #print("word:{}".format(word)) pinyin = pypinyin.lazy_pinyin(word, 0)[0] line.append(pinyin) for i in pinyin: tag.append(tag_dic[tmp[1]]) tag = ' '.join(tag) tags.append(tag) tags.append('tag0') tags = tags[:-1] response_tag = ' '.join(tags) sents = ' '.join(line) response = ' '.join([i.replace(' ', '_space') for i in sents]) return response + ' _link ' + response_tag
def getOnePatam(song_name_or_id): # 查询id的url url = 'https://music.163.com/weapi/cloudsearch/get/web?csrf_token=' # 伪装头部 head = { 'Host': 'music.163.com', 'Origin': 'https://music.163.com', 'Referer': 'https://music.163.com/search/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/67.0.3396.99 Safari/537.36', } # 第一个参数 song_name_or_id = ''.join(lazy_pinyin(song_name_or_id)) key = '{hlpretag:"",hlposttag:"</span>",s:"' + song_name_or_id + \ '",type:"1",csrf_token:"",limit:"30",total:"true",offset:"0"}' form_data = GetFormData(key) html = requests.post(url, headers=head, data=form_data) result = json.loads(html.text) return result['result']['songs']
def file_charge(num): filename = str(num) + '.txt' req_arr = [] with open('G:\\ngram_lexer\\sogou_input\\single-req\\' + filename, 'r', encoding='utf-8') as fr: for item in fr: req_arr.append(item.strip()) res_arr = [] with open('G:\\ngram_lexer\\sogou_input\\nn\\' + filename, 'r', encoding='utf-8') as fs: for item in fs: item = item.strip() pins = lazy_pinyin(item) res_arr.append(''.join(pins)) ee = 1 heihei = 1 hei_count = 0 now = 1 for i1, i2 in zip(req_arr, res_arr): if i1 != i2: if heihei == ee: if hei_count == 0: now = ee hei_count = 1 elif hei_count < 5: hei_count = hei_count + 1 else: break else: hei_count = 0 heihei = ee + 1 ee = ee + 1 print(str(num) + ' --- ' + str(now - 1))
def create_indice_completion_locations(self, db): location_score = 1 for lid, lname in dbutil.get_all_locations(db): if len(lname) < 1: self.logger.exception('%s location has no name' % lid) continue en_name = dbutil.get_location_en_name(db, lid) item = { 'id': 'l%s' % lid, '_name': lname, 'en_name': en_name, 'completionName': [lname.lower(), ''.join(lazy_pinyin(lname)), en_name.lower()], '_prompt': 'location', 'ranking_score': location_score * round((1.0 / len(lname)), 2) } self.create_index(item, 'completion')
def get_deletes(word: list): ''' @description: 对称删除 input list @param {type} @return: ''' word = ''.join(lazy_pinyin(word)) dels = [] queue = [word] dels.append(word) for _ in range(2): tmp = [] for word in queue: if len(word) > 1: for i in range(len(word)): except_char = word[:i] + word[i + 1:] if except_char not in dels: dels.append(except_char) if except_char not in tmp: tmp.append(except_char) queue = tmp return dels
def biu_pro(): biu_pro_dict = dict() temp = [] with open("pinyin_train.txt", "r", encoding="utf-8") as f: for lines in f: for word in lines: if '\u4e00' <= word <= '\u9fff': temp.append(word) else: strs = ''.join(temp) pinyinstrs = pypinyin.lazy_pinyin(strs, pypinyin.FIRST_LETTER) temp.clear() for index in range(len(strs)): if strs[index] in biu_pro_dict: biu_pro_dict[strs[index]]["sum"] += 1 if pinyinstrs[index] in biu_pro_dict[strs[index]]: biu_pro_dict[strs[index]][ pinyinstrs[index]] += 1 else: biu_pro_dict[strs[index]][ pinyinstrs[index]] = 1 else: biu_pro_dict[strs[index]] = dict() biu_pro_dict[strs[index]]["sum"] = 1 biu_pro_dict[strs[index]][pinyinstrs[index]] = 1 for i in list(biu_pro_dict.keys()): for j in biu_pro_dict[i]: if j == "sum": continue else: biu_pro_dict[i][ j] = biu_pro_dict[i][j] / biu_pro_dict[i]["sum"] print(biu_pro_dict) with open("biu_pro_dict.pkl", "wb") as f: pickle.dump(biu_pro_dict, f, pickle.HIGHEST_PROTOCOL)
def _du(self, _request, _rdata): if "user_uuid" not in _request: self.setErrorCode(API_ERR.NO_PARA) logging.error("Error for no para: %s.", (str(_request))) return _o = redis_hash_to_dict(self.application.redis, DeviceUser, _request["user_uuid"]) logging.info(_o) if _o == None: self.setErrorCode(API_ERR.NO_OBJECT) logging.error("Error for no user uuid: %s." % (_request["user_uuid"])) return # not return the password default return_password = False if "return_password" in _request: return_password = _request["return_password"] if not return_password: del _o["user_password"] _fn = _o.get("user_fullname") if _fn != None and not isinstance(_fn, unicode): _fn = _fn.decode("utf-8") _rdata.update(_o) _rdata["pinyinname0"] = "".join(lazy_pinyin(_fn)) _rdata["pinyinname1"] = "".join( list( itertools.chain.from_iterable( pinyin(_fn, style=pypinyin.INITIALS)))) return
def tts(model, raw_text, CONFIG, use_cuda, ap, use_gl, figures=False, use_pinyin=False): if use_pinyin: text = " ".join(lazy_pinyin(raw_text, style=style)) else: text = raw_text t_1 = time.time() waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, None, False) if CONFIG.model == "Tacotron" and not use_gl: # coorect the normalization differences b/w TTS and the Vocoder. mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T mel_postnet_spec = ap._denormalize(mel_postnet_spec) if not use_gl: mel_postnet_spec = ap_vocoder._normalize(mel_postnet_spec) waveform = wavernn.generate(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0).cuda(), batched=batched_wavernn, target=8000, overlap=400) print(" > Run-time: {}".format(time.time() - t_1)) if figures: visualize(alignment, mel_postnet_spec, stop_tokens, raw_text, ap.hop_length, CONFIG, mel_spec) # IPython.display.display(Audio(waveform, rate=CONFIG.audio['sample_rate'])) os.makedirs(OUT_FOLDER, exist_ok=True) file_name = raw_text.replace(" ", "_").replace(".","") + f"-{speaker_id}.wav" out_path = os.path.join(OUT_FOLDER, file_name) ap.save_wav(waveform, out_path) return alignment, mel_postnet_spec, stop_tokens, waveform
def handle_current_input(self, input, topv=15, topp=15): input = input.lower() if self.pat.findall(input): # 全数字,直接返回 return input pyl, two_part,may_parts = self.sp.split_pinyin(input) print(pyl, two_part,may_parts) if two_part == True and may_parts == False: prefix_ans = {} start = time.time() self.pt.get_totalwords_of_prefix(self.pt.root, pyl[-1], prefix_ans) sorted_pf_ans = sorted(prefix_ans.items(), key=lambda x: x[1], reverse=True) end = time.time() print("GET PREFIX COST: {}".format(end-start)) words = [hz_freq[0] for hz_freq in sorted_pf_ans[:topp]] # ------------------------- best_viterbi_ans = [] pinyins = map(lambda x: lazy_pinyin(x)[0], words) viterbi_ans = [] start = time.time() for _, py in enumerate(pinyins): pyl[-1] = py viterbi_ans = self.viterbi(pyl, topv, [words[_]]) # self.momo["".join(pyl[:-1]][state...] = end = time.time() print("VITERBI COST: {}".format(end-start)) best_viterbi_ans.extend(viterbi_ans) return best_viterbi_ans, two_part elif may_parts: new_viterbi_ans = serch_in_dict(pyl,self.dict) print new_viterbi_ans if new_viterbi_ans ==[]: new_viterbi_ans = self.newviterbi(pyl, topv) return new_viterbi_ans,two_part else: viterbi_ans = self.viterbi(pyl, topv, []) print viterbi_ans return viterbi_ans, two_part
def process_item(self, item, spider): if spider.name == 'songs': try: cursor1 = self.db.cursor() author_id = 0 keys = lazy_pinyin(item['author'] + '_' + item['dynasty']) key = ''.join(keys) kwd = '' if self.redis_conn.hexists('author', item['author_id']): kwd = item['author_id'] elif self.redis_conn.hexists('author', key): kwd = key if kwd != '': author_id = self.redis_conn.hget('author', kwd) else: sql = "insert into `author` (`name`,`dynasty`,`pinyin`) values(%s,%s,%s)" cursor1.execute( sql, [item['author'], item['dynasty'], item['pinyin']]) author_id = str(cursor1.lastrowid) self.redis_conn.hsetnx('author', key, author_id) created = int(time.time()) sql1 = "insert into `content` (`author_id`,`title`,`created`,`view_url`,`comment_num`,`point`,`content`) values(%s,%s,%s,%s,%s,%s,%s)" cursor1.execute(sql1, [ author_id, item['title'], created, item['view_url'], item['comment_nums'], item['point'], item['content'] ]) cursor1.close() except mysql.connector.Error as e: msg = u'view_url:%s 写入数据失败:%s' % (item['view_url'], e) logger.error(msg) cursor1.close() finally: cursor1.close() return item else: return item
def gerenate_city(city_hospitals, province_dir, city): suburb = '' suburb_hospitals = [] # 怎么杨移除xx族自治州? if city.endswith(('市', '州', '区', '县')): city = city[:-1] city_name = ''.join(py.lazy_pinyin(city, style=py.Style.NORMAL)) city_path = os.path.join(province_dir, '{}.md'.format(city_name)) if not os.path.exists(city_path): if not os.path.isdir(province_dir): os.makedirs(province_dir) try: hospitals[:][1][7] city_table = '| 区/县 | 名称 | 地址 | 电话 |\n|------|-------|------|------|\n' city_string = '| {} | {} | {} | {} \n' except IndexError: city_table = '| 区/县 | 名称 | 地址 |\n|------|-------|------|\n' city_string = '| {} | {} | {} \n' for city_hospital in city_hospitals: city_table += city_string.format(*city_hospital[4:]) with open(city_path, 'w+', encoding='utf-8') as f: f.write('{}\n{}\n'.format(gerenate_header(city_hospitals[0], 3), city_table))
def article_add(): form = ArticleForm() # form2 = UploadForm() # if form2.validate(): # try: # filename = secure_filename(''.join(lazy_pinyin(form2.upload.data.filename))) # form2.upload.data.save('./images/' + filename) # flash("上传成功") # # return redirect(url_for('.article_add')) # except: # flash("上传失败", category="error") if form.validate_on_submit(): try: filename = secure_filename(''.join( lazy_pinyin(form.img_url.data.filename))) print(filename) form.img_url.data.save('./static/images/' + filename) print("上传成功!") article = Article( title=form.title.data, content=form.content.data, types=form.types.data, # img_url=form.img_url.data, img_url=filename, author=form.author.data, is_recommend=form.is_recommend.data, is_valid=form.is_valid.data, created_at=datetime.now()) db.session.add(article) db.session.commit() flash("添加新闻成功!") return redirect(url_for('.article_index')) except: flash("添加新闻失败!", category="error") return render_template('/admin/article/add.html', form=form)
def make_plot(city, time): city_pinyin = ''.join(lazy_pinyin(city)) year_date = ['0' + str(i) if i < 10 else str(i) for i in range(1, 13)] mydict = {} for y in time: for i in year_date: url = f'http://lishi.tianqi.com/{city_pinyin}/{y}{i}.html' date, tianqi = get_temperature(url) for res in zip(date, tianqi): if res[0] == '日期': #remove header pass else: mydict[res[0][0:10]] = [ int(i.rstrip('℃')) for i in res[1][0:2] ] #cut year and temperature df = pd.DataFrame(mydict).T df.columns = ['the high', 'the low'] fig, ax = plt.subplots(figsize=(12, 6)) df.plot(ax=ax, title=f"{city} {list(time)[0]}-{list(time)[-1]} 温度变化", lw=1) plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False plt.grid(axis='y') plt.savefig('weather.pdf')
def synthesize(self, text, src, dst): """ Synthesize .wav from text src is the folder that contains all syllables .wav files dst is the destination folder to save the synthesized file """ print("Synthesizing ...") delay = 0 increment = 355 # milliseconds pause = 500 # pause for punctuation syllables = lazy_pinyin(text, style=pypinyin.TONE3) # initialize to be complete silence, each character takes up ~500ms result = AudioSegment.silent(duration=500 * len(text)) for syllable in syllables: path = src + syllable + ".wav" sound_file = Path(path) # insert 500 ms silence for punctuation marks if syllable in TextToSpeech.punctuation: short_silence = AudioSegment.silent(duration=pause) result = result.overlay(short_silence, position=delay) delay += increment continue # skip sound file that doesn't exist if not sound_file.is_file(): continue segment = AudioSegment.from_wav(path) result = result.overlay(segment, position=delay) delay += increment directory = dst if not os.path.exists(directory): os.makedirs(directory) result.export(directory + "generated.wav", format="wav") print("Exported.")
def get_asrlist(asr, le2id, seq_length): sub_num = {} sub_pos = {} sub_lens = [] lens = len(asr) sco = [lens] for i in range(lens): for j in sco: if i + j <= lens: tmp = asr[i:i + j] tmp_id = [] tmp_pinyin = pypinyin.lazy_pinyin(tmp, 0, errors='ignore') tmp_pinyin = ' '.join(tmp_pinyin) for k in tmp_pinyin: tmp_id.append(le2id.get(k)) sub_lens.append(len(tmp_id)) while len(tmp_id) < seq_length: tmp_id.append(0) if len(tmp_id) > seq_length: tmp_id = tmp_id[:seq_length] sub_pos[tmp] = (i, i + j) sub_num[tmp] = tmp_id return sub_num, sub_pos, sub_lens
def gen_test(): files = [] for root, dirnames, filenames in os.walk(RAW_DIR): for filename in filenames: if filename.endswith(".txt"): files.append(os.path.join(RAW_DIR, filename)) processed_lines = [] for raw in files: with open(raw, "r", encoding="utf8") as corpus: lines = corpus.readlines() for line in lines: processed_lines.extend(extract_sentences(line)) pinyin_list = [ " ".join(pypinyin.lazy_pinyin(line, errors="ignore")) for line in processed_lines ] with open(PINYIN_FILE, "w", encoding="utf8") as pinyin_file: pinyin_file.write("\n".join(pinyin_list) + "\n") with open(SENTENCES_FILE, "w", encoding="utf8") as sentences_file: sentences_file.write("\n".join(processed_lines) + "\n")
def parse(self, response): for build in foreigh_7: item = SightItem() log.msg('build: ' + build, level=log.INFO) if baidu_geo_api(build.encode('utf-8')) is not None: lng, lat = baidu_geo_api(build.encode('utf-8')) else: lng, lat = 1, 1 item['lng'] = lng item['lat'] = lat item['id_num'] = self.id_num self.id_num += 1L item['category'] = u'国外地标建筑' item['title'] = build.encode('utf-8') pinyin = lazy_pinyin(build) item['pinyin'] = ''.join(pinyin).upper() if lng == 1 or lat == 1: log.msg('no landmark found: ' + 'at line 36,' + build, level=log.INFO) continue baike_url = 'https://baike.baidu.com/item/%s' % build yield scrapy.Request(baike_url, meta={'item': item}, callback=self.content_parse)
def find_candidates(self, w): candidates = set() pin = lazy_pinyin(w)[0] try: # prefix查找 not_leaf = list( filter(lambda x: 0 <= (len(pin) - len(x)) <= 1, self.trie.prefixes(pin))) if len(not_leaf) == 0: not_leaf = self.trie.prefixes(pin)[0] for prefix in not_leaf: suffixes = self.trie.suffixes(prefix) for suf in suffixes: cand = prefix + suf # 长度限制 if 0 <= abs(len(cand) - len(pin)) <= 1: candidates.add(self.trie[cand]) candidates.add(w) except KeyError: pass return candidates
# 中文转拼音第三方库:https://github.com/mozillazg/python-pinyin from pypinyin import lazy_pinyin, pinyin, Style print(''.join(lazy_pinyin('测试qwe啦'))) # ceshiqwela print(pinyin('测试', style=Style.TONE2, heteronym=True)) # [['ce4'], ['shi4']]
get_bool_field, get_str_field, get_optional_str_field from ...core.exceptions import RequestArgumentError bpAdmin = Blueprint('admin', __name__) re_sep = re.compile(r',|,| | |\s') PAGE_SIZE = 50 COLUMNS = [ "调查", "雕龙", "光阴", "机动", "评论", "人物", "视界", "言己", "姿势", "摄影", "现场", "又见", "特稿", "节日", "未明", "图说", "征稿", "招新", "手记", "副刊", "对话", "论衡", "休刊", "纪念", "聚焦燕园", "休闲娱乐", "社会舆论", "校友往事", "教育科技", "排行榜", "生日", "译天下", "新年献词" ] COLUMNS.sort(key=lambda c: lazy_pinyin(c)) COLUMNS.append("其他") def get_range(page, size): page = max(page, 1) return ((page - 1) * size, page * size) @bpAdmin.route('/', methods=["GET"], strict_slashes=False) @bpAdmin.route('/article', methods=["GET"]) def article_html(): """ Method GET Args: - page int
def name_to_pinyin(name): return ''.join(lazy_pinyin(unicode(name)))
return text tex_head = '' tex_tail = '' with open('template.tex', 'r', encoding='UTF-8') as temp: tex_head = temp.read() with open('template_tail.tex', 'r', encoding="UTF-8") as temp: tex_tail = temp.read() with open('main.tex', 'w', encoding='UTF-8') as m: m.write(tex_head) students = [] for i in range(1, 5): students.extend(getStudentsInClass(i)) sorted_students = sorted(students, key=lambda x: lazy_pinyin(x[0])) for student in sorted_students: text = genTemplate(student) m.write(text) m.write('\n') # break # if i == 4: # for student in students: # if student[0] == '张皓': # text = genTemplate(student) # m.write(text) # m.write('\n') m.write(tex_tail)
def get_pinyin(word): result = lazy_pinyin(word) return result[0]
def _candidates_by_edit(self, word): return [ w for w in self.known(self.edits1(word)) or [word] if lazy_pinyin(word) == lazy_pinyin(w) ]