def district_path(): translater = pinyin.PinYin() # db = MySQLdb.connect('rds7di028yhg19m2v656o.mysql.rds.aliyuncs.com',"alphago","Alphago0311",'realestate' ,charset="utf8") db = MySQLdb.connect('127.0.0.1',"root","root",'realestate' ,charset="utf8") cr = db.cursor(cursorclass = MySQLdb.cursors.DictCursor) sql = 'alter table district add path varchar(64) after name' cr.execute(sql) db.commit() sql = 'select * from district' cr.execute(sql) districts = cr.fetchall() for d in districts: sql = 'update district set path = "'+ str(translater.convert(string=d['name'],join=True)) + '" where id = '+ str(d['id']) cr.execute(sql) print sql db.commit() db.close() return True
#- coding: utf-8 -*-. import urllib2 import re import pinyin import codecs response = urllib2.urlopen(u"http://quote.eastmoney.com/stocklist.html") content = response.read() content = content.strip() pattern = re.compile( r'<li><a target="_blank" href="http://quote.eastmoney.com/(?P<code>\w*).html">(?P<name>.*)\(.*\)</a></li>' ) result = u"" pinyin = pinyin.PinYin() for m in re.finditer(pattern, content): code = m.groups()[0][2:].decode(u'gb2312') name = m.groups()[1] if not isinstance(name, unicode): try: name = name.decode(u'gb2312') except: name = name.decode(u'gbk') if u'退市' in name: continue main_type = u'未知' submain_type1 = u'' submain_type2 = u'' abbr = pinyin.hanzi2pinyin_split(string=name, split=u"", firstcode=True)
else '' end as PARENTID , 'GS' as ORGTYPE, a.create_time, case when LENGTH(a.path)=8 then (SELECT id from org_unit where path='00000001') when LENGTH(a.path)=12 then (SELECT CONCAT(a.id,',',b.id) from org_unit b where LENGTH(b.path)=8 and SUBSTR(a.path ,1 ,8)=b.path) when LENGTH(a.path)=16 then (SELECT CONCAT('670869647114347',',',a.id,',',b.id)from org_unit b where LENGTH(b.path)=12 and SUBSTR(a.path ,1 ,12)=b.path) when LENGTH(a.path)=20 then (SELECT CONCAT(a.id,',',b.id)from org_unit b where LENGTH(b.path)=16 and SUBSTR(a.path ,1 ,16)=b.path) when LENGTH(a.path)=24 then (SELECT CONCAT(a.id,',',b.id)from org_unit b where LENGTH(b.path)=20 and SUBSTR(a.path ,1 ,20)=b.path) else '' end as PARENTIDS,a.ORG_ACCOUNT_ID from org_unit a) m) d on d.ID=a.ORG_DEPARTMENT_ID and d.ORG_ACCOUNT_ID=a.ORG_ACCOUNT_ID''' # 字段为中文转换拼音实例 List_ID = [] py = pinyin.PinYin() py.load_word() # 使用execute方法执行SQL语句 try: # cursor.execute(del_sql_unit) # cursor.execute(del_sql_umuser) cursor.execute(sql_unit) cursor.execute(sql_memer) cursor.execute(tmp_loginid) loginid_sql = cursor.fetchall() loginid_row = cursor.rowcount for i in range(loginid_row): # loginid_sql[i][1] = py.hanzi2pinyin_split(string=loginid_sql[i][0], split="-").replace('-', '') # print loginid_sql[i][1] sql_pinyin = "update tmp_umuser set logonid = '%s' where username = '******'" \
#!/usr/bin/env python import pinyin test = pinyin.PinYin() test.load_word() def getpinyin(s): s2 = '' try: s = s.decode('utf8') except: pass for i in s: try: si = test.hanzi2pinyin(string=i)[0] if si == '': si = i except: si = i s2 += si return s2.encode('utf8')
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. from pybtex.style.sorting import BaseSortingStyle import pinyin import re import os, sys wordfile = re.sub(r"[^/\\]*$", "word.data", __file__) if not os.path.exists(wordfile): wordfile = re.sub(r"[^/\\]*$", "word.data", os.path.abspath(sys.argv[0])) if not os.path.exists(wordfile): wordfile = "word.data" #print "Data File: " + os.path.abspath(wordfile) pymod = pinyin.PinYin(wordfile) pymod.load_word() class SortingStyle(BaseSortingStyle): name = 'chinese_author' def sorting_key(self, entry): if entry.type in ('book', 'inbook'): author_key = self.author_editor_key(entry) else: author_key = self.persons_key(entry.persons['author']) return (author_key, entry.fields.get('year', ''), entry.fields.get('title', '')) def persons_key(self, persons):
import codecs from ..trie import Trie import zh import pinyin stop_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'stopwords.txt') pinyin_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'pinyin.txt') stop = set() fr = codecs.open(stop_path, 'r', 'utf-8') for word in fr: stop.add(word.strip()) fr.close() pin = pinyin.PinYin(pinyin_path) re_zh = re.compile('([\u4E00-\u9FA5]+)') def filter_stop(words): return list(filter(lambda x: x not in stop, words)) def zh2hans(sent): return zh.transfer(sent) def get_sentences(doc): line_break = re.compile('[\r\n]') delimiter = re.compile('[,。?!;]') sentences = []