Exemplos de PinYin em Python, exemplos de pinyin.PinYin em Python

Exemplo n.º 1

0

Exibir arquivo

def district_path():
    translater = pinyin.PinYin()

    # db = MySQLdb.connect('rds7di028yhg19m2v656o.mysql.rds.aliyuncs.com',"alphago","Alphago0311",'realestate' ,charset="utf8")
    db = MySQLdb.connect('127.0.0.1',"root","root",'realestate' ,charset="utf8")
    cr = db.cursor(cursorclass = MySQLdb.cursors.DictCursor)
    sql = 'alter table district add path varchar(64) after name'
    cr.execute(sql)
    db.commit()
    sql = 'select * from district'
    cr.execute(sql)
    districts = cr.fetchall()
    for d in districts:
        sql = 'update district set path = "'+ str(translater.convert(string=d['name'],join=True)) + '" where id = '+ str(d['id'])
        cr.execute(sql)
        print sql
    db.commit()
    db.close()
    return True

Exemplo n.º 2

0

Exibir arquivo

Arquivo: crawler.py Projeto: yuxi214/HanboStock

#- coding: utf-8 -*-.
import urllib2
import re
import pinyin
import codecs

response = urllib2.urlopen(u"http://quote.eastmoney.com/stocklist.html")
content = response.read()
content = content.strip()
pattern = re.compile(
    r'<li><a target="_blank" href="http://quote.eastmoney.com/(?P<code>\w*).html">(?P<name>.*)\(.*\)</a></li>'
)

result = u""
pinyin = pinyin.PinYin()
for m in re.finditer(pattern, content):
    code = m.groups()[0][2:].decode(u'gb2312')
    name = m.groups()[1]
    if not isinstance(name, unicode):
        try:
            name = name.decode(u'gb2312')
        except:
            name = name.decode(u'gbk')

    if u'退市' in name:
        continue

    main_type = u'未知'
    submain_type1 = u''
    submain_type2 = u''
    abbr = pinyin.hanzi2pinyin_split(string=name, split=u"", firstcode=True)

Exemplo n.º 3

0

Exibir arquivo

		else ''
		end as PARENTID ,
		'GS' as ORGTYPE,
		a.create_time,
		case when LENGTH(a.path)=8 then (SELECT id from org_unit where path='00000001')
		when LENGTH(a.path)=12 then (SELECT CONCAT(a.id,',',b.id) from org_unit b where LENGTH(b.path)=8 and SUBSTR(a.path ,1 ,8)=b.path)
		when LENGTH(a.path)=16 then (SELECT CONCAT('670869647114347',',',a.id,',',b.id)from org_unit b where LENGTH(b.path)=12 and SUBSTR(a.path ,1 ,12)=b.path)
		when LENGTH(a.path)=20 then (SELECT CONCAT(a.id,',',b.id)from org_unit b where LENGTH(b.path)=16 and SUBSTR(a.path ,1 ,16)=b.path)
		when LENGTH(a.path)=24 then (SELECT CONCAT(a.id,',',b.id)from org_unit b where LENGTH(b.path)=20 and SUBSTR(a.path ,1 ,20)=b.path)
		else ''
		end as PARENTIDS,a.ORG_ACCOUNT_ID
		from org_unit a) m)  d on d.ID=a.ORG_DEPARTMENT_ID and d.ORG_ACCOUNT_ID=a.ORG_ACCOUNT_ID'''

# 字段为中文转换拼音实例
List_ID = []
py = pinyin.PinYin()
py.load_word()

# 使用execute方法执行SQL语句
try:
    # cursor.execute(del_sql_unit)
    # cursor.execute(del_sql_umuser)
    cursor.execute(sql_unit)
    cursor.execute(sql_memer)
    cursor.execute(tmp_loginid)
    loginid_sql = cursor.fetchall()
    loginid_row = cursor.rowcount
    for i in range(loginid_row):
        # loginid_sql[i][1] = py.hanzi2pinyin_split(string=loginid_sql[i][0], split="-").replace('-', '')
        # print loginid_sql[i][1]
        sql_pinyin = "update tmp_umuser set logonid = '%s' where username = '******'" \

Exemplo n.º 4

0

Exibir arquivo

#!/usr/bin/env python

import pinyin

test = pinyin.PinYin()
test.load_word()


def getpinyin(s):
    s2 = ''
    try:
        s = s.decode('utf8')
    except:
        pass
    for i in s:
        try:
            si = test.hanzi2pinyin(string=i)[0]
            if si == '':
                si = i
        except:
            si = i

        s2 += si
    return s2.encode('utf8')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: chinese_author.py Projeto: 00liujj/pybibtex

# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

from pybtex.style.sorting import BaseSortingStyle
import pinyin
import re
import os, sys
wordfile = re.sub(r"[^/\\]*$", "word.data", __file__)
if not os.path.exists(wordfile):
    wordfile = re.sub(r"[^/\\]*$", "word.data", os.path.abspath(sys.argv[0]))
if not os.path.exists(wordfile):
    wordfile = "word.data"
#print "Data File: " + os.path.abspath(wordfile)
pymod = pinyin.PinYin(wordfile)
pymod.load_word()


class SortingStyle(BaseSortingStyle):
    name = 'chinese_author'

    def sorting_key(self, entry):
        if entry.type in ('book', 'inbook'):
            author_key = self.author_editor_key(entry)
        else:
            author_key = self.persons_key(entry.persons['author'])
        return (author_key, entry.fields.get('year', ''),
                entry.fields.get('title', ''))

    def persons_key(self, persons):

Exemplo n.º 6

0

Exibir arquivo

import codecs

from ..trie import Trie
import zh
import pinyin

stop_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         'stopwords.txt')
pinyin_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                           'pinyin.txt')
stop = set()
fr = codecs.open(stop_path, 'r', 'utf-8')
for word in fr:
    stop.add(word.strip())
fr.close()
pin = pinyin.PinYin(pinyin_path)
re_zh = re.compile('([\u4E00-\u9FA5]+)')


def filter_stop(words):
    return list(filter(lambda x: x not in stop, words))


def zh2hans(sent):
    return zh.transfer(sent)


def get_sentences(doc):
    line_break = re.compile('[\r\n]')
    delimiter = re.compile('[，。？！；]')
    sentences = []