Python lazy_pinyin示例，pypinyin.lazy_pinyin Python示例

示例#1

0

显示文件

文件： pinyin_usage.py 项目： Rockyzsu/base_function

def change_pinyin(name):
    ret =lazy_pinyin(name,errors='ignore')
    short_cut = lazy_pinyin(name, style=Style.FIRST_LETTER)

    result = ret[:1]+short_cut[1:]
    # 返回不同组合的名字
    return ''.join(ret),'_'.join(ret),'_'.join(ret[:2])+''.join(ret[2:]),''.join(result),'_'.join(result[:2])+''.join(result[2:])

示例#2

0

显示文件

文件： test_pinyin.py 项目： zhaochl/python-pinyin

def test_custom_pinyin_dict():
    hans = '桔'
    try:
        assert lazy_pinyin(hans, style=TONE2) == ['ju2']
    except AssertionError:
        pass
    load_single_dict({ord('桔'): 'jú,jié'})
    assert lazy_pinyin(hans, style=TONE2) == ['ju2']

示例#3

0

显示文件

文件： test_pinyin.py 项目： zhaochl/python-pinyin

def test_custom_pinyin_dict2():
    hans = ['同行']
    try:
        assert lazy_pinyin(hans, style=TONE2) == ['to2ng', 'ha2ng']
    except AssertionError:
        pass
    load_phrases_dict({'同行': [['tóng'], ['xíng']]})
    assert lazy_pinyin(hans, style=TONE2) == ['to2ng', 'xi2ng']

示例#4

0

显示文件

文件： corrector.py 项目： djk111/pycorrector

def get_confusion_word_set(word):
    confusion_word_set = set()
    candidate_words = list(known(edit_distance_word(word, cn_char_set)))
    for candidate_word in candidate_words:
        if lazy_pinyin(candidate_word) == lazy_pinyin(word):
            # same pinyin
            confusion_word_set.add(candidate_word)
    return confusion_word_set

示例#5

0

显示文件

文件： deviceuser.py 项目： anxiaoyi/ppmessage

def create_device_user(redis, request):
    _uuid = request.get("uuid")

    if not _uuid:
        logging.error("no uuid provided. %s" % request)
        return None
    
    _is_service_user = bool(request.get("is_service_user"))
    _is_anonymous_user = bool(request.get("is_anonymous_user"))
    _is_owner_user = bool(request.get("is_owner_user"))

    _user_email = request.get("user_email")
    if not _user_email:
        import strgen
        _user_email = strgen.StringGenerator("[\d\w]{10}").render() + "@" + strgen.StringGenerator("[\d\w]{10}").render()
        
    _user_icon = request.get("user_icon")
    if not _user_icon:
        _user_icon = random_identicon(_user_email)

    _user_name = request.get("user_name")
    _user_mobile = request.get("user_mobile")
    _user_fullname = request.get("user_fullname")
    _user_password = request.get("user_password")
    _user_language = request.get("user_language") or "cn"

    _ent_user_uuid = request.get("ent_user_uuid")
    _ent_user_createtime = request.get("ent_user_createtime")
    
    import pypinyin
    if not isinstance(_user_fullname, unicode):
        _user_fullname = _user_fullname.decode("utf-8")
    _user_pinyin = "".join(pypinyin.lazy_pinyin(_user_fullname))
    _user_py = "".join(pypinyin.lazy_pinyin(_user_fullname, style=pypinyin.FIRST_LETTER))

    _values = {
        "uuid": _uuid,
        "is_service_user": _is_service_user,
        "is_owner_user": _is_owner_user,
        "is_ppmessage_user": _is_ppmessage_user,
        "is_anonymous_user": _is_anonymous_user,
        "user_name": _user_name,
        "user_mobile": _user_mobile,
        "user_email": _user_email,
        "user_icon": _user_icon,
        "user_fullname": _user_fullname,
        "user_password": _user_password,
        "user_pinyin": _user_pinyin,
        "user_py": _user_py,
        "ent_user_uuid": _ent_user_uuid,
        "ent_user_createtime": _ent_user_createtime
    }
    
    _row = DeviceUser(**_values)
    _row.async_add(redis)
    _row.create_redis_keys(redis)
    return _values

示例#6

0

显示文件

文件： test_pinyin.py 项目： zhaochl/python-pinyin

def test_errors_callable():
    def foobar(chars):
        return 'a' * len(chars)

    class Foobar(object):
        def __call__(self, chars):
            return 'a' * len(chars)

    n = 5
    assert lazy_pinyin('あ' * n, errors=foobar) == ['a' * n]
    assert lazy_pinyin('あ' * n, errors=Foobar()) == ['a' * n]

示例#7

0

显示文件

文件： city.py 项目： liuzelei/walis

def post_or_put(pk=None, dic=None):
    tobj = dic_to_tobj(dic, thirdparty_svc.ers.TCity, True)
    if not tobj.pinyin:
        tobj.pinyin = ''.join(lazy_pinyin(tobj.name, errors='ignore'))
    if not tobj.abbr:
        tobj.abbr = ''.join(
            lazy_pinyin(tobj.name, style=FIRST_LETTER, errors='ignore')).upper()
    if not tobj.sort:
        tobj.sort = 2000
    with thrift_client('ers') as ers:
        result = ers.save_city(pk, tobj)
    return result

示例#8

0

显示文件

文件： doSmokepingTarget.py 项目： langlangago/sa-scripts

def doTarget(isp, ipList):

	fd =open(target,'a')

	if isp == 'tel':
		title = 'telcom'
		menu = '中国电信'
	elif isp == 'uni':
		title = 'unicom'
		menu = '中国联通'
	elif isp =='mob':
		title = 'CMCC'
		menu = '中国移动'
	else :
		title = 'EDU'
		menu = '中国教育'

	line = "+%s\nmenu = %s\ntitle = %s\n\n" % (title, menu, title)
	fd.writelines(line)

	for ip in ipList.keys():
		subTitle = ''.join(lazy_pinyin(ipList[ip]))+"-"+ip.split('.')[0]
		line2 = '++%s\nmenu = %s\ntitle = %s\nhost = %s\n\n' %(subTitle, ipList[ip].encode('utf8'), ip, ip)
		fd.writelines(line2)

	fd.close()

示例#9

0

显示文件

文件： SortFilesForce.py 项目： twotreeszf/Sony-Walkman-Sorter

def CJKFirstLetters(str):
    pinyins = lazy_pinyin(str)
    firstLetters = ''
    for pinyin in pinyins:
        firstLetters += pinyin[0]

    return firstLetters

示例#10

0

显示文件

文件： taobao.py 项目： yunyu2019/blog

 def parse_profile(self,response):
     hxs=Selector(response)
     item=response.meta['item']
     page=response.meta['page']
     cont=hxs.xpath('//div[@class="mm-p-info mm-p-base-info"]')
     ls=cont.xpath('ul/li/span').extract()
     ls1=cont.xpath('ul/li/p').extract()
     lists=map(filterHtml,ls)
     profiles=map(filterHtml,ls1)
     exprince=hxs.xpath('//div[@class="mm-p-info mm-p-experience-info"]/p').extract()
     item['nicename']=lists[0].strip()
     item['borthday']=lists[1].replace(u'\xa0','')
     item['job']     =lists[3].strip(u'型')
     item['blood']   =lists[4].strip(u'型')
     item['school']  =''
     item['specialty']  =''
     if lists[5]!='':
         m=re.split(u'\xa0{2,}',lists[5])
         if len(m)>1:
             item['school']  =m[0]
             item['specialty']  =m[1]
     item['style']   =lists[6].strip()
     item['height']  =profiles[0].strip('CM')
     item['weight']  =profiles[1].strip('KG')
     item['solid']   =profiles[2].strip()
     item['bar']     =bar(profiles[3])
     item['shoes']   =profiles[4].strip(u'码')
     item['exprince']=filterHtml(exprince[0])
     left_img=hxs.xpath('//div[@class="mm-p-modelCard"]/a/img/@src').extract()
     item['life_img']='https:'+left_img[0] if left_img else ''
     item['image_urls']=[item['faceimg'],item['big_img'],item['life_img']]
     username=lazy_pinyin(item['nicename'])
     item['pinyin']=''.join(username)
     yield item

示例#11

0

显示文件

文件： ppgetuserdetailhandler.py 项目： anxiaoyi/ppmessage

    def _du(self):

        _request = json.loads(self.request.body)

        _user_uuid = _request.get("user_uuid")
        if not _user_uuid:
            self.setErrorCode(API_ERR.NO_PARA)
            return

        _o = redis_hash_to_dict(self.application.redis, DeviceUser, _user_uuid)
        if not _o:
            self.setErrorCode(API_ERR.NO_OBJECT)
            return

        # not return the password default
        return_password = False
        if "return_password" in _request:
            return_password = _request["return_password"]
        if not return_password:
            del _o["user_password"]
        
        _fn = _o.get("user_fullname")
        if _fn != None and not isinstance(_fn, unicode):
            _fn = _fn.decode("utf-8")

        _rdata = self.getReturnData()
        _rdata.update(_o)
        _rdata["pinyinname0"] = "".join(lazy_pinyin(_fn))
        _rdata["pinyinname1"] = "".join(list(itertools.chain.from_iterable(pinyin(_fn, style=pypinyin.INITIALS))))

        _app_uuid = _get_config().get("team").get("app_uuid")
        _o = redis_hash_to_dict(self.application.redis, AppInfo, _app_uuid)
        _rdata.update({"team": _o});
        return

示例#12

0

显示文件

文件： province_spider.py 项目： wirror800/crawler

    def parse(self,response):
        hxs=Selector(response)
        items=[]
        allSite=hxs.xpath('//ul/li[1]/dl/dt/a')
        if len(allSite)>0:
            url = response.url
            province = os.path.basename(url)
            item=ProvinceItem()
            item['link'] = allSite.xpath('@href').extract()[0]
            item['code'] = self.city_codes.get(province)
            items.append(item)

        sites=hxs.xpath('//ul/li[1]/dl/dd/a')
        for site in sites:
            item=ProvinceItem()
            item['name'] = site.xpath('text()').extract()[0]
            item['link'] = site.xpath('@href').extract()[0]
            item['code'] = int(os.path.basename(item['link']).split('-')[0])
            item['parent'] = item['code'] - item['code'] % 100
            item['pinyin'] = ''.join(lazy_pinyin(item['name']))
            items.append(item)
            #self.saveCity(item);
         
        for item in items:
            yield Request(item['link'],meta={'code':item['code']},callback=self.parse2)

示例#13

0

显示文件

文件： pipelines.py 项目： yunyu2019/blog

 def process_item(self, item, spider):
     if spider.name=='songs':
         try:
             cursor1=self.db.cursor()
             author_id=0
             keys=lazy_pinyin(item['author']+'_'+item['dynasty'])
             key=''.join(keys)
             kwd=''
             if self.redis_conn.hexists('author',item['author_id']):
                 kwd=item['author_id']
             elif self.redis_conn.hexists('author',key):
                 kwd=key
             if kwd!='':
                 author_id=self.redis_conn.hget('author',kwd)
             else:
                 sql="insert into `author` (`name`,`dynasty`,`pinyin`) values(%s,%s,%s)"
                 cursor1.execute(sql,[item['author'],item['dynasty'],item['pinyin']])
                 author_id=str(cursor1.lastrowid)
                 self.redis_conn.hsetnx('author',key,author_id)
             created=int(time.time())
             sql1="insert into `content` (`author_id`,`title`,`created`,`view_url`,`comment_num`,`point`,`content`) values(%s,%s,%s,%s,%s,%s,%s)"
             cursor1.execute(sql1,[author_id,item['title'],created,item['view_url'],item['comment_nums'],item['point'],item['content']])
             cursor1.close()
         except mysql.connector.Error as e:
             msg=u'view_url:%s 写入数据失败:%s' % (item['view_url'],e)
             logger.error(msg)
             cursor1.close()
         finally:
             cursor1.close()
         return item
     else:
         return item

示例#14

0

显示文件

文件： corrector_CH.py 项目： xzr12/IR

def correctOneWord(oldWord):
    word = oldWord.decode('utf-8')
    pyList = lazy_pinyin(word)
    pyStr = ""
    maxSame = 0
    resultWord = ""
    same = 0
    count = 0
    for py in pyList:
        pyStr+=py.encode('utf-8')
    print pyStr
    result = ChineseWordModel.objects.filter(pinyin=pyStr).order_by('idf')
    if len(result) == 0:
        print "pinyin do not exist"
        return oldWord
    for r in result:
        print r['word']
        print r['idf']
        same = findSameChar(word,r['word'])
        if(same>maxSame):
            maxSame = same
            resultWord = r['word']
    print "maxSame",maxSame
    if maxSame == 0:
        resultWord = result[0]['word']
        print "no similar word"
    return resultWord

示例#15

0

显示文件

文件：文字转换成语音.py 项目： gswyhq/hello-world

def txt_to_voice(text, name='test', export_path=EXPORT_PATH):
    """
    将文字转换为音频
    :param text: 需要转换的文字
    :param name: 生成的音频文件名
    :return: 
    """
    pinyin_list = lazy_pinyin(text, style=TONE3)
    new = AudioSegment.empty()
    for piny in pinyin_list:
        piny_song = VOICE_DICT.get(piny)
        if piny_song is None and piny and piny[-1] not in '0123456789':
            # 没有音调
            piny = piny + '5'
            piny_song = VOICE_DICT.get(piny, silent)

        # 交叉渐入渐出方法
        # with_style = beginning.append(end, crossfade=1500)
        # crossfade 就是让一段音乐平缓地过渡到另一段音乐，crossfade = 1500 表示过渡的时间是1.5秒。
        # if new and piny_song:
        #     crossfade = min(len(new), len(piny_song), 1500)/60
        #     new = new.append(piny_song, crossfade=crossfade)
        if not piny_song:
            continue
        new += piny_song

    new.export(os.path.join(export_path, "{}.mp3".format(name)), format='mp3')

示例#16

0

显示文件

文件： search.py 项目： xzr12/IR

def initial(folderName, type):
    documents = os.listdir('./'+folderName+'/')
    index = 1
    # the same sequence with model in models.py
    model = {}
    documentModel = {}
    wordModel = {}

    ld = len(documents)
    if type == 0:
        hrefList = open('./html_sohu.txt', 'r').readlines()
        titleList = open('./title_sohu.txt', 'r').readlines()
        for document in documents:
            if index % 50 == 0:
                print str(index) + ' / ' + str(ld)
            documentName = document[0:4]
            documentModel[documentName] = {'length': 0, 'href': hrefList[int(documentName)-1].split('\n')[0].split('\t')[1], 'title':titleList[int(documentName)-1].split('\n')[0].split('\t')[1]}
            words = open('./'+folderName+'/'+document, 'r').readlines()
            for word in words:
                singleWord = word.split('\n')[0]
                if len(singleWord) < 3:
                    continue
                singleWordUnicode = singleWord.decode('utf-8')
                pinyins = lazy_pinyin(singleWordUnicode)
                pinyinStr = ''
                for pinyin in pinyins:
                    pinyinStr = pinyinStr + pinyin.encode('utf-8')
                if len(pinyinStr) < 2:
                    continue
                if (singleWord in wordModel) == False:
                    wordModel[singleWord] = {'length': len(pinyins), 'idf': 0, 'pinyin': pinyinStr}
                if ((singleWord, documentName) in model) == False:
                    model[(singleWord, documentName)] = {'tfIdf': 0, 'times': 1, 'tf': 0}
                else:
                    times = model[(singleWord, documentName)]['times'] + 1
                    model[(singleWord, documentName)]['times'] = times
            index = index + 1
    else:
        hrefList = open('./html_wiki.txt', 'r').readlines()
        titleList = open('./title_wiki.txt', 'r').readlines()
        for document in documents:
            if index % 50 == 0:
                print str(index) + ' / ' + str(ld)
            documentName = document[0:4]
            documentModel[documentName] = {'length': 0, 'href': hrefList[int(documentName)-1].split('\n')[0].split('\t')[1], 'title':titleList[int(documentName)-1].split('\n')[0].split('\t')[1]}
            words = open('./'+folderName+'/'+document, 'r').readlines()
            for word in words:
                singleWord = word.split('\n')[0]
                l = len(singleWord)
                if l < 3 or l > 15:
                    continue
                if (singleWord in wordModel) == False:
                    wordModel[singleWord] = {'length': l, 'idf': 0}
                if ((singleWord, documentName) in model) == False:
                    model[(singleWord, documentName)] = {'tfIdf': 0, 'times': 1, 'tf': 0}
                else:
                    times = model[(singleWord, documentName)]['times'] + 1
                    model[(singleWord, documentName)]['times'] = times
            index = index + 1
    return model, documentModel, wordModel

示例#17

0

显示文件

文件： ppgetuserdetailhandler.py 项目： Michael2008S/ppmessage

    def _du(self, _request, _rdata):
        if "user_uuid" not in _request:
            self.setErrorCode(API_ERR.NO_PARA)
            logging.error("Error for no para: %s.", (str(_request)))
            return

        _o = redis_hash_to_dict(self.application.redis, DeviceUser, _request["user_uuid"])

        logging.info(_o)
        
        if _o == None:
            self.setErrorCode(API_ERR.NO_OBJECT)
            logging.error("Error for no user uuid: %s." % (_request["user_uuid"]))
            return

        # not return the password default
        return_password = False
        if "return_password" in _request:
            return_password = _request["return_password"]
        if not return_password:
            del _o["user_password"]
        
        _fn = _o.get("user_fullname")
        if _fn != None and not isinstance(_fn, unicode):
            _fn = _fn.decode("utf-8")

        _rdata.update(_o)
        _rdata["pinyinname0"] = "".join(lazy_pinyin(_fn))
        _rdata["pinyinname1"] = "".join(list(itertools.chain.from_iterable(pinyin(_fn, style=pypinyin.INITIALS))))
        
        return

示例#18

0

显示文件

文件： models.py 项目： Hainuer/website

 def save(self, *args, **kwargs):
     # 输入中文
     if self.simple_name == '' or self.simple_name == None:
         self.simple_name = '-'.join(lazy_pinyin(self.name))
     # 输入英文
     if self.simple_name == '' or self.simple_name == None:
         self.simple_name = self.name
     super(Category, self).save(*args, **kwargs)

示例#19

0

显示文件

文件： search_prompt.py 项目： xuyaoqiang/search_prompt

 def delete(self, item, pinyin=False, seg=False):
     self.item_check(item)
     uid = hashlib.md5(item['term'].encode('utf8')).hexdigest()
     for prefix in self.prefixs_for_term(item['term'], seg=seg):
         self._delete_prefix(prefix, uid)
         if pinyin:
             prefix_pinyin = ''.join(lazy_pinyin(prefix))
             self._delete_prefix(prefix_pinyin, uid)

示例#20

0

显示文件

文件： quanguoxingzhengshuju.py 项目： fanday/demo

def pinyinFirst(name):
    print name
    pinyin = lazy_pinyin(name)
    retPinyin = []
    for element in pinyin:
        retPinyin =retPinyin + [element[0]]

    return retPinyin

示例#21

0

显示文件

文件： city.py 项目： yunxingwoo/Spider_app

def weather_crawler(city):
    city_pinyin = ''.join(lazy_pinyin(city))
    weather_url = 'http://lishi.tianqi.com/{}/index.html'.format(city_pinyin)
    weather_web_data = requests.get(weather_url)
    weather_soup = BeautifulSoup(weather_web_data.text,'lxml')
    weather = weather_soup.select(' div.tqtongji > p')[0].get_text()[0:-15]
    wind = weather_soup.select('  div.tqtongji > ul')[1].get_text().replace('\n',' ')
    print(weather,'\n\n'+'风力情况为：\n',wind)

示例#22

0

显示文件

文件： core.py 项目： chienius/anicolle

 def to_dict(self):
     return {
         'id': self.id,
         'name': self.name,
         'cur_epi': self.cur_epi,
         'on_air_epi': self.on_air_epi,
         'on_air_day': self.on_air_day,
         'seeker': self.seeker,
         'name_pinyin': ''.join(lazy_pinyin(self.name, Style.FIRST_LETTER))
     }

示例#23

0

显示文件

文件： main_report.py 项目： hezhenke/quantdata

 def __translate_title(self,title):
     for item in title:
         if isinstance(item, str):
             zhongwen = item
             if zhongwen in self.__zhongwen_2_pinyin:
                 continue
             else:
                 pinyin = "_".join(lazy_pinyin(zhongwen, errors='ignore'))
                 self.__pinyin_2_zhongwen[pinyin] = zhongwen
                 self.__zhongwen_2_pinyin[zhongwen] = pinyin
         elif isinstance(item, list):
             zhongwen = item[0]
             zhongwen2 = item[1]
             if zhongwen in self.__zhongwen_2_pinyin:
                 continue
             else:
                 pinyin = "_".join(lazy_pinyin(zhongwen, errors='ignore'))
                 self.__pinyin_2_zhongwen[pinyin] = zhongwen + "(" + zhongwen2 + ")"
                 self.__zhongwen_2_pinyin[zhongwen] = pinyin

示例#24

0

显示文件

def get_pinyin_first(org_str):
    """获取字符串的拼音首字母"""
    if org_str:
        if isinstance(org_str, unicode):
            result_list=lazy_pinyin(org_str)
            return result_list[0][0]
        else: # 不尝试转码？
            #raise Exception('%s should be unicode to get pinyin' % org_str)
            return ''
    else:
        return ''

示例#25

0

显示文件

文件： singlesong.py 项目： yunyu2019/blog

 def parse(self,response):
     curr_url=response.url
     curr_query=urlparse.urlparse(curr_url)
     view_url=curr_query.path
     hxs = Selector(response)
     warp=hxs.xpath('//div[@class="shileft"]')
     if warp:
         song=SongsItem()
         song['view_url']  =view_url
         try:
             title=warp.xpath('div[@class="son1"]/h1/text()').extract()[0]
             son2=warp.xpath('div[@class="son2"]')
             temp=son2.css('.line1 *::text').extract()
             point=0
             comment_num=0
             if temp:
                 comment=temp[0].strip()
                 m=re.search('(\d+)',comment)
                 comment_num=m.group(1) if m else 0
                 point=temp[1].strip() if len(temp)>1 else 0
             dynasty=son2.xpath('p[1]/text()').extract()
             dynasty=dynasty[0] if dynasty else ''
             author_temp=son2.xpath('p[2]/a/text()').extract()
             author=author_temp[0] if author_temp else son2.xpath('p[2]/text()').extract()[0]
             author_name=lazy_pinyin(author)
             url_temp=son2.xpath('p[2]/a/@href')
             author_id=0
             if url_temp:
                 author_url=url_temp.extract()[0]
                 m1=re.search('(\d+)',author_url)
                 author_id=m1.group(1)
             strs=son2.extract()[0]
             compiles=re.compile(r'</span></p>(.*)?.*?</div>',re.S)
             m=re.search(compiles,strs)
             content=m.group(1).strip() if m.groups() else ''
             relation_urls=hxs.xpath('//div[@class="son5" and @id]/p[1]/a/@href').extract()
             song['title']     =title
             song['comment_nums']=comment_num
             song['point']     =point
             song['dynasty']=dynasty
             song['author'] =author
             song['content']=content
             song['pinyin']=''.join(author_name)
             song['author_id']=author_id
             song['relation_urls']=relation_urls if relation_urls else ''
             if relation_urls:
                 for i in relation_urls:
                     url='http://so.gushiwen.org'+i
                     yield Request(url,callback=self.parse_relation,meta={'item':song},errback=self.catchError)
             else:
                 yield song
         except Exception, e:
             msg=u"urls:%s message:%s" % (curr_url,str(e))
             logger.error(msg)

示例#26

0

显示文件

文件： topinyin.py 项目： lovitus/IDtagToPinyin

def modifytag(filename,*attr):
	if len(attr)==None:
		print('no attr given ,please input "artist","title" ...')
		quit()
	else:
		audiofile=eyed3.load(filename)
		for changeattr in attr:
			pinyinattr=pypinyin.lazy_pinyin(getattr(audiofile.tag,changeattr))
			pinyinattr=''.join(str(x) for x in pinyinattr)
			setattr(audiofile.tag,changeattr,pinyinattr.decode('utf-8'))
		audiofile.tag.save()

示例#27

0

显示文件

文件： util.py 项目： xcsliu/pycharm_obj

def get_ready_data_file_path(city_name, data_type, source_name, data_label):
    city_name_pinyin = ''.join(lazy_pinyin(city_name))
    # raw data path  : poi/poi_data/city/raw_data  /date/1.anjuke_old 2.anjuke_new 3.lianjia_old 4.lianjia_new 5.baidu 6.fangtianxia
    # ready_data path: poi/poi_data/city/ready_data/1.anjuke 2.lianjia 3.baidu 4.fangtianxia
    path = os.path.join(os.path.dirname(os.getcwd()), 'poi', 'poi_data', city_name_pinyin, data_type)
    if not os.path.exists(path):
        os.makedirs(path)
    file_path = path + '\{}_{}_{}.tsv'.format(city_name_pinyin, source_name, data_label)
    if not is_windows_system():
        linux_file_path = file_path.replace('\\', '/')
        return linux_file_path
    return file_path

示例#28

0

显示文件

文件： search_prompt.py 项目： xuyaoqiang/search_prompt

 def add(self, item, pinyin=False, seg=False):
     self.item_check(item)
     term = item['term']
     score = item.get('score', 0)
     uid = hashlib.md5(item['term'].encode('utf8')).hexdigest()
     
     self.redis.hset(self.db, uid, json.dumps(item))
     for prefix in self.prefixs_for_term(term, seg):
         self._index_prefix(prefix, uid, score=score)
         if pinyin:
             prefix_pinyin = ''.join(lazy_pinyin(prefix))
             self._index_prefix(prefix_pinyin, uid, score=score)

示例#29

0

显示文件

文件： open_weiboscope.py 项目： anukat2015/sunny-side-up

 def hanzi_to_pinyin(txt):
     """
     Returns a version of txt with Chinese characters replaced with alphanumeric
     pinyin romanization
     
     Args:
         txt -- Chinese text with Chinese characters in it (unicode)
     Returns:
         unicode with romanized version of txt
     """
     pinyin = pyp.lazy_pinyin(txt, style=pyp.TONE2)
     return u''.join(pinyin)

示例#30

0

显示文件

文件： iPhone_contacts.py 项目： gxzzxg/iPhone_Contact

def generatenewcontact(contact_str_list):
    namepattern = re.compile(r'FN:((?:.|\n)*?)\n')
    for index, people in enumerate(contact_str_list):
        if people.find('X-PHONETIC-LAST-NAME') >= 0 :
            pass
        else:
            name = namepattern.findall(people)
            namepinyin = lazy_pinyin(name)
            tempstr = "X-PHONETIC-LAST-NAME:"+''.join(namepinyin)+'\n'
            contact_str_list[index] = contact_str_list[index]+tempstr
            
    return contact_str_list

示例#31

0

显示文件

文件： netease.py 项目： ssln2014/Music-Player

 def searchMusic(self, search_song, headers):
     searchUrl = 'https://music.163.com/weapi/cloudsearch/get/web?csrf_token='
     musicStr = ''.join(lazy_pinyin(search_song))
     key = '{hlpretag:"",hlposttag:"</span>",s:"' + musicStr + '",type:"1",csrf_token:"",limit:"30",total:"true",offset:"0"}'
     dataStr = str({'s': musicStr, 'csrf_token': ''})
     FormData = self.GetFormData(key)
     response = requests.request(
         'POST',
         searchUrl,
         data=FormData,
         headers={
             'User-agent':
             headers,
             'referer':
             'https://music.163.com/',
             'Host':
             'music.163.com',
             'Accept':
             'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
         })
     song_writer = []
     song_id = []
     song_name = []
     song_zj = []
     song_dict = json.loads(response.text)
     # for song in song_dict['result']['songs']:
     #     print(song)
     #     song_name.append(song['name'])
     #     song_id.append(song['id'])
     #     song_ar = song['ar']
     #     if len(song_ar) == 2:
     #         song_writer.append(song_ar[0]['name'] + '_' + song_ar[1]['name'])
     #     else:
     #         song_writer.append(song_ar[0]['name'])
     #     song_zj.append(song['al']['name'])
     return song_dict['result']['songs']

示例#32

0

显示文件

    def __review(self, query):

        if self.__exist_filter(query.get('filter')) or query.get('start') > 0:
            return False, None
        if isinstance(query.get('input'), str) or isinstance(
                query.get('input'), unicode):
            name = query.get('input').replace(' ',
                                              '').replace('+',
                                                          '').replace('-', '')
            es_query = {}
            if len(name) < 5:
                es_query.setdefault('bool',
                                    {}).setdefault('should', []).append(
                                        templates.get_string_template(
                                            'name', ' '.join(name), '100%'))
            es_query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_string_template(
                    'alias', ''.join(lazy_pinyin(name, errors='ignore')),
                    '100%'))
            sort = query.get('sort', 1)
            order = query.get('order', 'default')
            hits = self.es.search(index='xiniudata',
                                  doc_type='company',
                                  body={
                                      "query":
                                      es_query,
                                      "sort":
                                      self.__generate_sort_search(sort, order),
                                      "from":
                                      0,
                                      "size":
                                      10
                                  })
            # print es_query
            return True, hits
        return False, None

示例#33

0

显示文件

def deal_text(text: str, pinyin_char_table: dict, record: dict,
              binary_record: dict):
    start = len(pinyin_char_table) + 1
    stop = len(pinyin_char_table) + 2
    left = start
    notation = lazy_pinyin(text,
                           style=STYLE_NORMAL,
                           errors=lambda x: [None] * len(x))
    for pinyin, char in zip(notation, text):
        if pinyin is None:
            right = start
        else:
            pinyin = REGULAR_PINYIN.get(pinyin, pinyin)
            pinyin = FORCE_PINYIN.get(char, pinyin)
            right = pinyin_char_table.get((pinyin, char), start)
            if right == start:
                print('WARNING: strang (pinyin, char):', pinyin, char)
        record[right] += 1
        if right != start:
            binary_record[left][right] += 1
        elif left != start:
            binary_record[left][stop] += 1
        left = right
    return

示例#34

0

显示文件

def ad_update_pic():
    file = request.files['file']
    if not is_admin_login(request):
        return jsonify({"code": 208, "msg": "登录信息已经过期"})
    admin_id = get_admin_id_by_cookie(request)
    if exist_admin(admin_id):
        filename = "".join(lazy_pinyin(file.filename))
        b = "."
        redis = get_redis_cli()
        incr = redis.incr('admin-images')
        filename = str(incr) + str(filename[filename.rfind(b):])
        if file and allowed_file(filename):
            filename = secure_filename(filename)
            file.save(os.path.join(app.config['ADMIN_UPLOAD_FOLDER'],
                                   filename))
            url = "http://192.168.195.10:5005/admin/images/" + filename
            admin = Admin.query.filter(Admin.admin_id == admin_id).first()
            admin.head_pic = url
            db.session.commit()
            return {"code": "200", "msg": "上传成功", "url": url}
        else:
            return {"code": "203", "msg": "上传失败"}
    else:
        return {"code": "203", "msg": "抱歉，管理员不存在"}

示例#35

0

显示文件

文件： miniprogram_api.py 项目： pkuyouth/PKUyouthWebServer

def get_column_list():
	try:
		newsDB = NewsDB()
		newsCount = [item for item in newsDB.group_count("newsDetail","column") if item["column"] in columns]
		newsCountDict = {item["column"]:item["count"] for item in newsCount}

		columnsInfo = [{
			"id": idx,
			"title": title,
			"desc": desc,
			"cover": "%s.jpg" % "".join(lazy_pinyin(title)),
			"newsCount": newsCountDict[title]
		} for idx, (title, desc) in enumerate(columns.items())]

		# columnsInfo.sort(key=lambda column: lazy_pinyin(column["title"]))

	except Exception as err:
		jsonPack = {"errcode": -1, "error": repr(err)}
		raise err
	else:
		jsonPack = {"errcode": 0, "columns": columnsInfo}
	finally:
		newsDB.close()
		return json.dumps(jsonPack)

示例#36

0

显示文件

    def speak(self, text):
        syllables = lazy_pinyin(text, style=pypinyin.TONE3)
        print(syllables)
        delay = 0

        def preprocess(syllables):
            temp = []
            for syllable in syllables:
                for p in TextToSpeech.punctuation:
                    syllable = syllable.replace(p, "")
                if syllable.isdigit():
                    syllable = atc.num2chinese(syllable)
                    new_sounds = lazy_pinyin(syllable, style=pypinyin.TONE3)
                    for e in new_sounds:
                        temp.append(e)
                else:
                    temp.append(syllable)
            return temp

        syllables = preprocess(syllables)
        for syllable in syllables:
            path = "syllables/" + syllable + ".wav"
            _thread.start_new_thread(TextToSpeech._play_audio, (path, delay))
            delay += 0.355

示例#37

0

显示文件

    def cut(self, sents):
        self.num += 1
        line = []
        tags = []
        tag_dic = {'0': 'tag0', '1': 'tag1', '2': 'tag2'}
        word_tags = sents.split(' ')
        for word_tag in word_tags:
            tmp = word_tag.split('/')
            word = tmp[0]
            tag = []
            #print("word:{}".format(word))
            pinyin = pypinyin.lazy_pinyin(word, 0)[0]
            line.append(pinyin)
            for i in pinyin:
                tag.append(tag_dic[tmp[1]])
            tag = ' '.join(tag)
            tags.append(tag)
            tags.append('tag0')

        tags = tags[:-1]
        response_tag = ' '.join(tags)
        sents = ' '.join(line)
        response = ' '.join([i.replace(' ', '_space') for i in sents])
        return response + ' _link ' + response_tag

示例#38

0

显示文件

def getOnePatam(song_name_or_id):
    # 查询id的url
    url = 'https://music.163.com/weapi/cloudsearch/get/web?csrf_token='
    # 伪装头部
    head = {
        'Host':
        'music.163.com',
        'Origin':
        'https://music.163.com',
        'Referer':
        'https://music.163.com/search/',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
        'Chrome/67.0.3396.99 Safari/537.36',
    }

    # 第一个参数
    song_name_or_id = ''.join(lazy_pinyin(song_name_or_id))
    key = '{hlpretag:"",hlposttag:"</span>",s:"' + song_name_or_id + \
          '",type:"1",csrf_token:"",limit:"30",total:"true",offset:"0"}'
    form_data = GetFormData(key)
    html = requests.post(url, headers=head, data=form_data)
    result = json.loads(html.text)
    return result['result']['songs']

示例#39

0

显示文件

文件： split_file_999_1000.py 项目： CoolPicker/self_teaching

def file_charge(num):
    filename = str(num) + '.txt'
    req_arr = []
    with open('G:\\ngram_lexer\\sogou_input\\single-req\\' + filename,
              'r',
              encoding='utf-8') as fr:
        for item in fr:
            req_arr.append(item.strip())
    res_arr = []
    with open('G:\\ngram_lexer\\sogou_input\\nn\\' + filename,
              'r',
              encoding='utf-8') as fs:
        for item in fs:
            item = item.strip()
            pins = lazy_pinyin(item)
            res_arr.append(''.join(pins))

    ee = 1
    heihei = 1
    hei_count = 0
    now = 1
    for i1, i2 in zip(req_arr, res_arr):
        if i1 != i2:
            if heihei == ee:
                if hei_count == 0:
                    now = ee
                    hei_count = 1
                elif hei_count < 5:
                    hei_count = hei_count + 1
                else:
                    break
            else:
                hei_count = 0
            heihei = ee + 1
        ee = ee + 1
    print(str(num) + ' --- ' + str(now - 1))

示例#40

0

显示文件

    def create_indice_completion_locations(self, db):

        location_score = 1
        for lid, lname in dbutil.get_all_locations(db):
            if len(lname) < 1:
                self.logger.exception('%s location has no name' % lid)
                continue
            en_name = dbutil.get_location_en_name(db, lid)
            item = {
                'id':
                'l%s' % lid,
                '_name':
                lname,
                'en_name':
                en_name,
                'completionName':
                [lname.lower(), ''.join(lazy_pinyin(lname)),
                 en_name.lower()],
                '_prompt':
                'location',
                'ranking_score':
                location_score * round((1.0 / len(lname)), 2)
            }
            self.create_index(item, 'completion')

示例#41

0

显示文件

def get_deletes(word: list):
    '''
    @description:  对称删除 input list
    @param {type} 
    @return:
    '''
    word = ''.join(lazy_pinyin(word))
    dels = []
    queue = [word]
    dels.append(word)

    for _ in range(2):
        tmp = []
        for word in queue:
            if len(word) > 1:
                for i in range(len(word)):
                    except_char = word[:i] + word[i + 1:]
                    if except_char not in dels:
                        dels.append(except_char)
                    if except_char not in tmp:
                        tmp.append(except_char)
        queue = tmp

    return dels

示例#42

0

显示文件

def biu_pro():
    biu_pro_dict = dict()
    temp = []
    with open("pinyin_train.txt", "r", encoding="utf-8") as f:
        for lines in f:
            for word in lines:
                if '\u4e00' <= word <= '\u9fff':
                    temp.append(word)
                else:
                    strs = ''.join(temp)
                    pinyinstrs = pypinyin.lazy_pinyin(strs,
                                                      pypinyin.FIRST_LETTER)
                    temp.clear()
                    for index in range(len(strs)):
                        if strs[index] in biu_pro_dict:
                            biu_pro_dict[strs[index]]["sum"] += 1
                            if pinyinstrs[index] in biu_pro_dict[strs[index]]:
                                biu_pro_dict[strs[index]][
                                    pinyinstrs[index]] += 1
                            else:
                                biu_pro_dict[strs[index]][
                                    pinyinstrs[index]] = 1
                        else:
                            biu_pro_dict[strs[index]] = dict()
                            biu_pro_dict[strs[index]]["sum"] = 1
                            biu_pro_dict[strs[index]][pinyinstrs[index]] = 1
    for i in list(biu_pro_dict.keys()):
        for j in biu_pro_dict[i]:
            if j == "sum":
                continue
            else:
                biu_pro_dict[i][
                    j] = biu_pro_dict[i][j] / biu_pro_dict[i]["sum"]
    print(biu_pro_dict)
    with open("biu_pro_dict.pkl", "wb") as f:
        pickle.dump(biu_pro_dict, f, pickle.HIGHEST_PROTOCOL)

示例#43

0

显示文件

文件： ppgetuserdetailhandler.py 项目： zhanghcn/ppmessage-1

    def _du(self, _request, _rdata):
        if "user_uuid" not in _request:
            self.setErrorCode(API_ERR.NO_PARA)
            logging.error("Error for no para: %s.", (str(_request)))
            return

        _o = redis_hash_to_dict(self.application.redis, DeviceUser,
                                _request["user_uuid"])

        logging.info(_o)

        if _o == None:
            self.setErrorCode(API_ERR.NO_OBJECT)
            logging.error("Error for no user uuid: %s." %
                          (_request["user_uuid"]))
            return

        # not return the password default
        return_password = False
        if "return_password" in _request:
            return_password = _request["return_password"]
        if not return_password:
            del _o["user_password"]

        _fn = _o.get("user_fullname")
        if _fn != None and not isinstance(_fn, unicode):
            _fn = _fn.decode("utf-8")

        _rdata.update(_o)
        _rdata["pinyinname0"] = "".join(lazy_pinyin(_fn))
        _rdata["pinyinname1"] = "".join(
            list(
                itertools.chain.from_iterable(
                    pinyin(_fn, style=pypinyin.INITIALS))))

        return

示例#44

0

显示文件

文件： Benchmark.py 项目： chynphh/TTS

def tts(model, raw_text, CONFIG, use_cuda, ap, use_gl, figures=False, use_pinyin=False):
    if use_pinyin:
        text = " ".join(lazy_pinyin(raw_text, style=style))
    else:
        text = raw_text
    t_1 = time.time()
    waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, None, False)
    if CONFIG.model == "Tacotron" and not use_gl:
        # coorect the normalization differences b/w TTS and the Vocoder.
        mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T
    mel_postnet_spec = ap._denormalize(mel_postnet_spec)
    if not use_gl:
        mel_postnet_spec = ap_vocoder._normalize(mel_postnet_spec)
        waveform = wavernn.generate(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0).cuda(), batched=batched_wavernn, target=8000, overlap=400)

    print(" >  Run-time: {}".format(time.time() - t_1))
    if figures:                                                                                                         
        visualize(alignment, mel_postnet_spec, stop_tokens, raw_text, ap.hop_length, CONFIG, mel_spec)                                                                       
    # IPython.display.display(Audio(waveform, rate=CONFIG.audio['sample_rate']))  
    os.makedirs(OUT_FOLDER, exist_ok=True)
    file_name = raw_text.replace(" ", "_").replace(".","") + f"-{speaker_id}.wav"
    out_path = os.path.join(OUT_FOLDER, file_name)
    ap.save_wav(waveform, out_path)
    return alignment, mel_postnet_spec, stop_tokens, waveform

示例#45

0

显示文件

文件： GodTian_Pinyin.py 项目： Lichongjie/shurufa

 def handle_current_input(self, input, topv=15, topp=15):
     input = input.lower()
     if self.pat.findall(input):   # 全数字，直接返回
         return input
     pyl, two_part,may_parts = self.sp.split_pinyin(input)
     print(pyl, two_part,may_parts)
     if two_part == True and may_parts == False:
         prefix_ans = {}
         start = time.time()
         self.pt.get_totalwords_of_prefix(self.pt.root, pyl[-1], prefix_ans)
         sorted_pf_ans = sorted(prefix_ans.items(), key=lambda x: x[1], reverse=True)
         end = time.time()
         print("GET PREFIX COST: {}".format(end-start))
         words = [hz_freq[0] for hz_freq in sorted_pf_ans[:topp]]
         # -------------------------
         best_viterbi_ans = []
         pinyins = map(lambda x: lazy_pinyin(x)[0], words)
         viterbi_ans = []
         start = time.time()
         for _, py in enumerate(pinyins):
             pyl[-1] = py
             viterbi_ans = self.viterbi(pyl, topv, [words[_]])  # self.momo["".join(pyl[:-1]][state...] =
         end = time.time()
         print("VITERBI COST: {}".format(end-start))
         best_viterbi_ans.extend(viterbi_ans)
         return best_viterbi_ans, two_part
     elif may_parts:
         new_viterbi_ans = serch_in_dict(pyl,self.dict)
         print new_viterbi_ans
         if new_viterbi_ans ==[]:
            new_viterbi_ans = self.newviterbi(pyl, topv)
         return new_viterbi_ans,two_part
     else:
         viterbi_ans = self.viterbi(pyl, topv, [])
         print viterbi_ans
         return viterbi_ans, two_part

示例#46

0

显示文件

 def process_item(self, item, spider):
     if spider.name == 'songs':
         try:
             cursor1 = self.db.cursor()
             author_id = 0
             keys = lazy_pinyin(item['author'] + '_' + item['dynasty'])
             key = ''.join(keys)
             kwd = ''
             if self.redis_conn.hexists('author', item['author_id']):
                 kwd = item['author_id']
             elif self.redis_conn.hexists('author', key):
                 kwd = key
             if kwd != '':
                 author_id = self.redis_conn.hget('author', kwd)
             else:
                 sql = "insert into `author` (`name`,`dynasty`,`pinyin`) values(%s,%s,%s)"
                 cursor1.execute(
                     sql, [item['author'], item['dynasty'], item['pinyin']])
                 author_id = str(cursor1.lastrowid)
                 self.redis_conn.hsetnx('author', key, author_id)
             created = int(time.time())
             sql1 = "insert into `content` (`author_id`,`title`,`created`,`view_url`,`comment_num`,`point`,`content`) values(%s,%s,%s,%s,%s,%s,%s)"
             cursor1.execute(sql1, [
                 author_id, item['title'], created, item['view_url'],
                 item['comment_nums'], item['point'], item['content']
             ])
             cursor1.close()
         except mysql.connector.Error as e:
             msg = u'view_url:%s 写入数据失败:%s' % (item['view_url'], e)
             logger.error(msg)
             cursor1.close()
         finally:
             cursor1.close()
         return item
     else:
         return item

示例#47

0

显示文件

文件： gerenate_designated_hospitals.py 项目： chianhappy/wuhan.support

def gerenate_city(city_hospitals, province_dir, city):
    suburb = ''
    suburb_hospitals = []
    # 怎么杨移除xx族自治州？
    if city.endswith(('市', '州', '区', '县')):
        city = city[:-1]
    city_name = ''.join(py.lazy_pinyin(city, style=py.Style.NORMAL))
    city_path = os.path.join(province_dir, '{}.md'.format(city_name))
    if not os.path.exists(city_path):
        if not os.path.isdir(province_dir):
            os.makedirs(province_dir)
        try:
            hospitals[:][1][7]
            city_table = '|  区/县  |  名称  |  地址  |  电话  |\n|------|-------|------|------|\n'
            city_string = '|  {}  |  {}  |  {}  |  {}  \n'
        except IndexError:
            city_table = '|  区/县  |  名称  |  地址  |\n|------|-------|------|\n'
            city_string = '|  {}  |  {}  |  {}  \n'
        for city_hospital in city_hospitals:
            city_table += city_string.format(*city_hospital[4:])

        with open(city_path, 'w+', encoding='utf-8') as f:
            f.write('{}\n{}\n'.format(gerenate_header(city_hospitals[0], 3),
                                      city_table))

示例#48

0

显示文件

文件： admin.py 项目： LQZme/flask-news2

def article_add():
    form = ArticleForm()
    # form2 = UploadForm()
    # if form2.validate():
    #     try:
    #         filename = secure_filename(''.join(lazy_pinyin(form2.upload.data.filename)))
    #         form2.upload.data.save('./images/' + filename)
    #         flash("上传成功")
    #         # return redirect(url_for('.article_add'))
    #     except:
    #         flash("上传失败", category="error")
    if form.validate_on_submit():

        try:
            filename = secure_filename(''.join(
                lazy_pinyin(form.img_url.data.filename)))
            print(filename)
            form.img_url.data.save('./static/images/' + filename)
            print("上传成功！")
            article = Article(
                title=form.title.data,
                content=form.content.data,
                types=form.types.data,
                # img_url=form.img_url.data,
                img_url=filename,
                author=form.author.data,
                is_recommend=form.is_recommend.data,
                is_valid=form.is_valid.data,
                created_at=datetime.now())
            db.session.add(article)
            db.session.commit()
            flash("添加新闻成功！")
            return redirect(url_for('.article_index'))
        except:
            flash("添加新闻失败!", category="error")
    return render_template('/admin/article/add.html', form=form)

示例#49

0

显示文件

文件： weather.py 项目： wook2014/pyscript-bio

def make_plot(city, time):
    city_pinyin = ''.join(lazy_pinyin(city))
    year_date = ['0' + str(i) if i < 10 else str(i) for i in range(1, 13)]
    mydict = {}
    for y in time:
        for i in year_date:
            url = f'http://lishi.tianqi.com/{city_pinyin}/{y}{i}.html'
            date, tianqi = get_temperature(url)
            for res in zip(date, tianqi):
                if res[0] == '日期':  #remove header
                    pass
                else:
                    mydict[res[0][0:10]] = [
                        int(i.rstrip('℃')) for i in res[1][0:2]
                    ]  #cut year and temperature

    df = pd.DataFrame(mydict).T
    df.columns = ['the high', 'the low']
    fig, ax = plt.subplots(figsize=(12, 6))
    df.plot(ax=ax, title=f"{city} {list(time)[0]}-{list(time)[-1]} 温度变化", lw=1)
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    plt.grid(axis='y')
    plt.savefig('weather.pdf')

示例#50

0

显示文件

    def synthesize(self, text, src, dst):
        """
        Synthesize .wav from text
        src is the folder that contains all syllables .wav files
        dst is the destination folder to save the synthesized file
        """
        print("Synthesizing ...")
        delay = 0
        increment = 355  # milliseconds
        pause = 500  # pause for punctuation
        syllables = lazy_pinyin(text, style=pypinyin.TONE3)

        # initialize to be complete silence, each character takes up ~500ms
        result = AudioSegment.silent(duration=500 * len(text))
        for syllable in syllables:
            path = src + syllable + ".wav"
            sound_file = Path(path)
            # insert 500 ms silence for punctuation marks
            if syllable in TextToSpeech.punctuation:
                short_silence = AudioSegment.silent(duration=pause)
                result = result.overlay(short_silence, position=delay)
                delay += increment
                continue
            # skip sound file that doesn't exist
            if not sound_file.is_file():
                continue
            segment = AudioSegment.from_wav(path)
            result = result.overlay(segment, position=delay)
            delay += increment

        directory = dst
        if not os.path.exists(directory):
            os.makedirs(directory)

        result.export(directory + "generated.wav", format="wav")
        print("Exported.")

示例#51

0

显示文件

文件： utils.py 项目： qniguoym/tf-Serving

def get_asrlist(asr, le2id, seq_length):
    sub_num = {}
    sub_pos = {}
    sub_lens = []
    lens = len(asr)
    sco = [lens]
    for i in range(lens):
        for j in sco:
            if i + j <= lens:
                tmp = asr[i:i + j]
                tmp_id = []
                tmp_pinyin = pypinyin.lazy_pinyin(tmp, 0, errors='ignore')
                tmp_pinyin = ' '.join(tmp_pinyin)
                for k in tmp_pinyin:
                    tmp_id.append(le2id.get(k))
                sub_lens.append(len(tmp_id))
                while len(tmp_id) < seq_length:
                    tmp_id.append(0)
                if len(tmp_id) > seq_length:
                    tmp_id = tmp_id[:seq_length]
                sub_pos[tmp] = (i, i + j)
                sub_num[tmp] = tmp_id

    return sub_num, sub_pos, sub_lens

示例#52

0

显示文件

文件： gen_test.py 项目： nickyc975/Chinese_Information_Processing

def gen_test():
    files = []
    for root, dirnames, filenames in os.walk(RAW_DIR):
        for filename in filenames:
            if filename.endswith(".txt"):
                files.append(os.path.join(RAW_DIR, filename))

    processed_lines = []
    for raw in files:
        with open(raw, "r", encoding="utf8") as corpus:
            lines = corpus.readlines()
            for line in lines:
                processed_lines.extend(extract_sentences(line))

    pinyin_list = [
        " ".join(pypinyin.lazy_pinyin(line, errors="ignore"))
        for line in processed_lines
    ]

    with open(PINYIN_FILE, "w", encoding="utf8") as pinyin_file:
        pinyin_file.write("\n".join(pinyin_list) + "\n")

    with open(SENTENCES_FILE, "w", encoding="utf8") as sentences_file:
        sentences_file.write("\n".join(processed_lines) + "\n")

示例#53

0

显示文件

 def parse(self, response):
     for build in foreigh_7:
         item = SightItem()
         log.msg('build: ' + build, level=log.INFO)
         if baidu_geo_api(build.encode('utf-8')) is not None:
             lng, lat = baidu_geo_api(build.encode('utf-8'))
         else:
             lng, lat = 1, 1
         item['lng'] = lng
         item['lat'] = lat
         item['id_num'] = self.id_num
         self.id_num += 1L
         item['category'] = u'国外地标建筑'
         item['title'] = build.encode('utf-8')
         pinyin = lazy_pinyin(build)
         item['pinyin'] = ''.join(pinyin).upper()
         if lng == 1 or lat == 1:
             log.msg('no landmark found: ' + 'at line 36,' + build,
                     level=log.INFO)
             continue
         baike_url = 'https://baike.baidu.com/item/%s' % build
         yield scrapy.Request(baike_url,
                              meta={'item': item},
                              callback=self.content_parse)

示例#54

0

显示文件

文件： corrector.py 项目： RacleRay/Bank_FAQ_ChatBot

    def find_candidates(self, w):
        candidates = set()
        pin = lazy_pinyin(w)[0]
        try:
            # prefix查找
            not_leaf = list(
                filter(lambda x: 0 <= (len(pin) - len(x)) <= 1,
                       self.trie.prefixes(pin)))

            if len(not_leaf) == 0:
                not_leaf = self.trie.prefixes(pin)[0]

            for prefix in not_leaf:
                suffixes = self.trie.suffixes(prefix)
                for suf in suffixes:
                    cand = prefix + suf
                    # 长度限制
                    if 0 <= abs(len(cand) - len(pin)) <= 1:
                        candidates.add(self.trie[cand])

            candidates.add(w)
        except KeyError:
            pass
        return candidates

示例#55

0

显示文件

# 中文转拼音第三方库：https://github.com/mozillazg/python-pinyin

from pypinyin import lazy_pinyin, pinyin, Style

print(''.join(lazy_pinyin('测试qwe啦')))  # ceshiqwela
print(pinyin('测试', style=Style.TONE2, heteronym=True))  # [['ce4'], ['shi4']]

示例#56

0

显示文件

文件： __init__.py 项目： pkuyouth/pkuyouth-webserver-v2

    get_bool_field, get_str_field, get_optional_str_field
from ...core.exceptions import RequestArgumentError

bpAdmin = Blueprint('admin', __name__)

re_sep = re.compile(r',|，| |　|\s')

PAGE_SIZE = 50

COLUMNS = [
    "调查", "雕龙", "光阴", "机动", "评论", "人物", "视界", "言己", "姿势", "摄影", "现场", "又见",
    "特稿", "节日", "未明", "图说", "征稿", "招新", "手记", "副刊", "对话", "论衡", "休刊", "纪念",
    "聚焦燕园", "休闲娱乐", "社会舆论", "校友往事", "教育科技", "排行榜", "生日", "译天下", "新年献词"
]

COLUMNS.sort(key=lambda c: lazy_pinyin(c))
COLUMNS.append("其他")


def get_range(page, size):
    page = max(page, 1)
    return ((page - 1) * size, page * size)


@bpAdmin.route('/', methods=["GET"], strict_slashes=False)
@bpAdmin.route('/article', methods=["GET"])
def article_html():
    """
    Method   GET
    Args:
        - page   int

示例#57

0

显示文件

def name_to_pinyin(name):
    return ''.join(lazy_pinyin(unicode(name)))

示例#58

0

显示文件

    return text


tex_head = ''
tex_tail = ''
with open('template.tex', 'r', encoding='UTF-8') as temp:
    tex_head = temp.read()
with open('template_tail.tex', 'r', encoding="UTF-8") as temp:
    tex_tail = temp.read()

with open('main.tex', 'w', encoding='UTF-8') as m:
    m.write(tex_head)
    students = []
    for i in range(1, 5):
        students.extend(getStudentsInClass(i))

    sorted_students = sorted(students, key=lambda x: lazy_pinyin(x[0]))
    for student in sorted_students:
        text = genTemplate(student)
        m.write(text)
        m.write('\n')
        #     break
        # if i == 4:
        #     for student in students:
        #         if student[0] == '张皓':
        #             text = genTemplate(student)
        #             m.write(text)
        #             m.write('\n')
    m.write(tex_tail)

示例#59

0

显示文件

文件： ndataDeal.py 项目： hmfighting/spelling_check

def get_pinyin(word):
    result = lazy_pinyin(word)
    return result[0]

示例#60

0

显示文件

 def _candidates_by_edit(self, word):
     return [
         w for w in self.known(self.edits1(word)) or [word]
         if lazy_pinyin(word) == lazy_pinyin(w)
     ]