def searchRecursive( node, letter, word, previousRow, results, maxCost ): columns = len( word ) + 1 currentRow = [ previousRow[0] + 1 ] # Build one row for the letter, with a column for each letter in the target # word, plus one for the empty string at column 0 for column in xrange( 1, columns ): insertCost = currentRow[column - 1] + 1 deleteCost = previousRow[column] + 1 if word[column - 1] == letter or pinyin.get(word[column - 1])==pinyin.get(letter): replaceCost = previousRow[ column - 1 ] else: replaceCost = previousRow[ column - 1 ] + 1 currentRow.append( min( insertCost, deleteCost, replaceCost ) ) # if the last entry in the row indicates the optimal cost is less than the # maximum cost, and there is a word in this trie node, then add it. if currentRow[-1] <= maxCost and node.word != None: results.append( [node.word, currentRow[-1]] ) # if any entries in the row are less than the maximum cost, then # recursively search each branch of the trie if min( currentRow ) <= maxCost: for letter in node.children: searchRecursive( node.children[letter], letter, word, currentRow, results, maxCost )
def test_numbers(self): """Test the get/get_consonant/get_vowel function for arabic numbsers """ self.assertEqual(pinyin.get('2'), u'er') self.assertEqual(pinyin.get('1234567890'), u'yiersansiwuliuqibajiuling') self.assertEqual(pinyin.get_consonant('1'), u'y') self.assertEqual(pinyin.get_vowel('1'), u'i')
def insertMongo(cont): print(type1+'#########') print(type2+'#########') print(type3+'#########') global keyword global key global value keyword = [] tech = [] if cont == None: pass tech = [{"名称":[type3]},] flaga = 0 for item in content: words = wordslist() for word in words: name = word[0].strip('\n') name = name.strip(" ") datasource = word[1] # print('--------------%r-------------' % word) if name in item: keyword +=((name,datasource),) if '【' in item: if flaga == 1: tech += [{key:value},] #print("key=="+key+"value: :"+str(value)+'\n\n\n') else: flaga=1 item = item.replace('【','') item = item.replace('】','') key = item.replace(' ','') key = key.strip('\n') value = [] continue else: value.append(item) #print("key=="+key+"value::"+str(value)+'\n\n\n') tech += [{key:value},] keyword = set(keyword) result = [] for i in list(keyword): keysss = {} keysss["name"] = i[0] keysss["datasource"] = i[1] result += [keysss,] key_word = {"keyword":result} tech = {"content":tech} tech.update(key_word) rank = (("rankone",type1.strip('\n')),("ranktwo",type2.strip('\n')),("rankthree",type3.strip('\n')),("source","diag_rule"),) tech.update(dict(rank)) sort = (("sortrankone",pinyin.get(str(type1).strip('\n'))),("sortranktwo",pinyin.get(str(type2).strip('\n'))),("sortrankthree",pinyin.get(str(type3).strip('\n'))),) index = {"index":type3.strip('\n')} tech.update(index) tech.update(sort) #w.write(str(type3).strip('\n')+"$$"+"diag_rule"+"\n") knowledgeBase.insert(tech) print(tech)
def Line5(): noun = open('../data/freqword_n.txt', encoding='utf-8').readlines() verb = open('../data/freqword_v.txt', encoding='utf-8').readlines() nounlist = [] for word in noun: # outfile.write(pinyin.get(word, format="strip")+" ") i = 0 while i<len(word): if word[i:i+2]!="\n": nounlist.append(word[i:i+2]) i=i+3 verblist = [] for word in verb: i = 0 while i<len(word): if word[i:i+2]!="\n": verblist.append(word[i:i+2]) i=i+3 # sentence = "" count = 0 num = 0 rhythm = "" rhythmList = ["a", "e", "i", "o", "u"] while num < 4: i = random.randint(1, len(nounlist)-1) i1 = random.randint(1, len(nounlist)-1) j = random.randint(1, len(verblist)-1) ind = 0 ind1 = 0 if (num == 1): rhythm = "" verse = pinyin.get(nounlist[i1][1], format="strip") for p in range(len(verse)-1, -1, -1): if verse[p] in rhythmList: ind = p rhythm = verse[ind:len(verse)] if (num == 3): ind1 = 0 verse1 = pinyin.get(nounlist[i1][1], format="strip") for p in range(len(verse1)-1, -1, -1): if verse1[p] in rhythmList: ind1 = p while verse1[ind1: len(verse1)] != rhythm: i1 = random.randint(1, len(nounlist)-1) verse1 = pinyin.get(nounlist[i1][1], format="strip") for p in range(len(verse1)-1, -1, -1): if verse1[p] in rhythmList: ind1 = p print(nounlist[i]+verblist[j][1]+nounlist[i1]) num += 1
def test_init_contact(self): """ 初始化联系信息 """ import xlrd book = xlrd.open_workbook('d:/a.xls') sheet = book.sheets()[0] max_row = sheet.nrows print 'begin init database' name_dict = {} max_row = 3 for i in range(2, max_row): print 'row', i dept = sheet.row_values(i)[1].encode("utf-8").strip() job = sheet.row_values(i)[2].encode("utf-8").strip() name = sheet.row_values(i)[3].encode("utf-8").strip() name_pinyin = pinyin.get(name) name_shot = pinyin.get_initial(name, '') username = name_pinyin mobile = str(sheet.row_values(i)[4]) telephone = str(sheet.row_values(i)[5]) innerphone = str(sheet.row_values(i)[6]) fax = str(sheet.row_values(i)[7]) #去掉那些float的. mobile = self.remove_dot(mobile) telephone = self.remove_dot(telephone) innerphone = self.remove_dot(innerphone) fax = self.remove_dot(fax) #统计重复的名字拼音 if name_pinyin in name_dict: username = username + str(name_dict[name_pinyin]) name_dict[name_pinyin] += 1 pass else: name_dict[name_pinyin]=1 print name, job, mobile, telephone, innerphone, fax, dept, pinyin.get(name), name_shot, username #创建到数据库 user = User.objects.get(username='******'); qbinfo = QBContactInfo(duty=job, mobile=mobile, telephone=telephone, innerphone=innerphone, fax=fax) qbinfo.user = user qbinfo.save() print len(name_dict) print name_dict print 'end'
def test_get(self): self.assertEqual(pinyin.get('你好'), pinyin.get('你好', format="diacritical")) self.assertEqual(pinyin.get(u('你好'), format="strip"), u('nihao')) self.assertEqual(pinyin.get(u('你好'), format="numerical"), u('ni3hao3')) self.assertEqual(pinyin.get(u('你好'), format="diacritical"), u('nǐhǎo')) self.assertEqual(pinyin.get('你好吗?'), u('nǐhǎoma?')) self.assertEqual(pinyin.get('你好吗?'), u('nǐhǎoma?')) self.assertEqual(pinyin.get('你好'), u('nǐhǎo')) self.assertEqual(pinyin.get('叶'), u('yè')) self.assertEqual(pinyin.get('少女'), u('shǎonv̌'))
def parse_small_city(self, response): item = response.meta['item'] request_list = [] url_set = set() #category category_url_list = response.xpath('//li[@class="term-list-item"]//ul[@class="desc Fix"]//li/a[starts-with(@onclick, "pageTracker._trackPageview(\'dp_home_food_hotdaohang_fenlei")]/@href').extract() category_list = response.xpath('//li[@class="term-list-item"]//ul[@class="desc Fix"]//li/a[starts-with(@onclick, "pageTracker._trackPageview(\'dp_home_food_hotdaohang_fenlei")]/text()').extract() #area region_data = response.xpath('//div[@class="pop-panel ep_quick-search ep_quick-search-regions Fix"]/div[@class="dp-option-wrap"]/dl').extract() for region in region_data: region_html = Selector(text=region) region_id = region_html.xpath('//dt/a/@data-value').extract()[0].strip() region = region_html.xpath('//dt/a/strong/text()').extract()[0].strip('[]\n\r\t ') area_list = region_html.xpath('//ul/li/a/text()').extract() area_id_list = region_html.xpath('//ul/li/a/@data-value').extract() for area_index, area in enumerate(area_list): for cate_index, category in enumerate(category_list): one_item = deepcopy(item) one_item['category'] = category.strip() one_item['region'] = region.strip() one_item['region_code'] = pinyin.get(one_item['region']) one_item['area'] = area.strip() one_item['area_code'] = pinyin.get(one_item['area']) url = "".join(['http://www.dianping.com', category_url_list[cate_index], 'r', area_id_list[area_index]]) small_city_xx_cnt += 1 print('大城市商圈分类url数量:\t'+str(small_city_xx_cnt)) print(url) print(one_item['city']) print(one_item['category']) print(one_item['area']) if url not in url_set: url_set.add(url) else: continue global small_city_xx_cnt request_list.append(Request(url, method='GET', meta={'item': one_item}, headers=headers, cookies=None, body=None, priority=0, errback=None, encoding=response.encoding, callback=self.parse_restaurant_list)) return request_list
def parse_big_city(self, response): item = response.meta['item'] request_list = [] url_set = set() #category category_id_list = response.xpath('//div[@class="secondary-category J-secondary-category"]/a[starts-with(@onclick, "pageTracker._trackPageview(\'dp_head_guangzhou_food_fenlei")]/@href').extract() category_list = response.xpath('//div[@class="secondary-category J-secondary-category"]/a[starts-with(@onclick, "pageTracker._trackPageview(\'dp_head_guangzhou_food_fenlei")]/text()').extract() #area js = re.search('<script class="J_auto-load" type="text/plain">([\s\S]*?)</script>', response.body).group(1) data = Selector(text=js) region_data = data.xpath('//div[@class="fpp_business"]//dl').extract() for region in region_data: region_html = Selector(text=region) region = region_html.xpath('//dt/a/text()').extract()[0].strip('[]\n\r\t ') area_list = region_html.xpath('//li/a/text()').extract() area_url_list = region_html.xpath('//li/a/@href').extract() for area_index, area in enumerate(area_list): for cate_index, category in enumerate(category_list): one_item = deepcopy(item) one_item['category'] = category.strip() one_item['region'] = region one_item['region_code'] = pinyin.get(one_item['region']) one_item['area'] = area.strip() one_item['area_code'] = pinyin.get(one_item['area']) url = "".join(['http://www.dianping.com', area_url_list[area_index], 'g', category_id_list[cate_index]]) print('大城市商圈分类url数量:\t'+str(big_city_xx_cnt)) print(one_item['city']) print(one_item['category']) print(one_item['area']) print(url) if url not in url_set: url_set.add(url) else: continue global big_city_xx_cnt big_city_xx_cnt += 1 request_list.append(Request(url, method='GET', meta={'item': one_item}, headers=headers, cookies=None, body=None, priority=0, errback=None, encoding=response.encoding, callback=self.parse_restaurant_list)) return request_list
def makeVariants(ptxt): ptxt = ptxt.strip() plst = list(ptxt) for i in range(0, len(plst), 1): plst[i] = pinyin.get(plst[i]) roman = u'.'.join(plst) en = pinyin.get(ptxt) try: en = en[0].upper() + en[1:].lower() except: print ptxt print en raise return [ptxt, roman, en]
def register(request): if request.method == 'POST': form = UserCreationForm(request.POST) if form.is_valid(): cd = form.cleaned_data new_user = form.save() new_user.first_name = cd['first_name'] new_user.last_name = cd['last_name'] new_user.email = cd['username'] new_user.save() aDomain = pinyin.get('-'.join(cd['first_name'] + cd['last_name'])) regex = '^' + aDomain + '(-\d+)?' same_name = AccountModel.objects.filter( aDomain__regex=regex).order_by('-aUser__id') if not same_name: p = AccountModel(aUser=new_user, aDomain=aDomain) else: the_last = same_name[0].aDomain regex1 = re.compile('^' + aDomain + '$') regex2 = re.compile('^' + aDomain + '-(\d+)') match1 = re.match(regex1, the_last) match2 = re.match(regex2, the_last) if match1: p = AccountModel(aUser=new_user, aDomain=aDomain + '-1') else: match2.group(1) p = AccountModel( aUser=new_user, aDomain=aDomain + '-' + str(int(match2.group(1)) + 1)) p.save() return redirect("/register") else: form = UserCreationForm() ctx = {'form': form} ctx.update(csrf(request)) return render(request, "register.html", ctx)
def get_role2feature_info(domain_name, role_name): domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') role_name_en = domain_ch2en_dict[role_name] _id = domain_pinyin + '_' + role_name_en try: es_result = es.get(index=weibo_role_index_name, doc_type=weibo_role_index_type, id=_id)['_source'] feature_info_dict = es_result feature_filter_dict = dict() feature_filter_dict['political_side'] = json.loads( feature_info_dict['political_side']) try: feature_filter_dict['psy_feature'] = json.loads( feature_info_dict['psy_feature']) except: feature_filter_dict['psy_feature'] = [] #print return feature_filter_dict except: return []
def add(self, town): cursor = connection.cursor() cursor.execute("INSERT INTO town (id, province, city, county, name, " \ "pinyin) VALUES (?, ?, ?, ?, ?, ?)", (town.id, town.county.city.province.id, town.county.city.id,\ town.county.id, town.name, pinyin.get(town.name),)) connection.commit()
def add(self, county): cursor = connection.cursor() cursor.execute("INSERT INTO county (id, province, city, name, pinyin)" \ " VALUES (?, ?, ?, ?, ?)", (county.id, county.city.province.id, county.city.id, \ county.name, pinyin.get(county.name),)) connection.commit()
def to_city(self, var_str, type): if isinstance(var_str, str): if var_str == 'None': return '' else: stringHead = '' stringFull = '' for single in var_str: stringFull += pinyin.get(single, format='strip', delimiter="") stringHead += pinyin.get(single, format='strip', delimiter="")[0] if type == "FULL": return stringFull elif type == "HEAD": return stringHead else: return ''
def get_add_opinion_corpus(task_detail): mark = False corpus_name = task_detail['corpus_name'] corpus_pinyin = pinyin.get(corpus_name, format='strip', delimiter='_') item_dict = {} item_dict['corpus_name'] = corpus_name item_dict['corpus_pinyin'] = corpus_pinyin item_dict['submitter'] = task_detail['submitter'] item_dict['status'] = 0 try: es.get(index=opinion_corpus_index_name, doc_type=opinion_corpus_index_type, id=corpus_pinyin) return 'exists' except: try: es.index(index=opinion_corpus_index_name, doc_type=opinion_corpus_index_type, body=item_dict, id=corpus_pinyin) mark = True r_r.lpush(opinion_expand_task_queue_name, json.dumps(item_dict)) except: pass return mark
def upload(): if request.method == 'POST': file_val = request.files['file'] # print('filename:', file_val.filename) if file_val and allowed_file(file_val.filename): ascii_name = pinyin.get(file_val.filename) filename = secure_filename(ascii_name) file_path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename) file_val.save(file_path) # cloud_sight try: cloud_img = CloudImage(file=open(file_path, 'rb'), locale=LOCALE, lang=LANG) except: return jsonify(type='mistake', content=None) try: os.remove(file_path) except: print('Remove failed. file not found.') if cloud_img: result = cloud_img.result() if result: # print('result is', result.encode('utf-8')) return jsonify(type='result', content=result) else: print('result is None') return jsonify(type='mistake', content=None) else: print('cloud_img is None') return jsonify(type='mistake', content=None)
def search(query,tag): if tag=='song': songid=anydbm.open('id_song.db','r') daopai=anydbm.open('daopai_song.db','r') elif tag=='singer': songid=anydbm.open('id_singer.db','r') daopai=anydbm.open('daopai_singer.db','r') elif tag=='both': songid=anydbm.open('id_both.db','r') daopai=anydbm.open('daopai_both.db','r') counthash={} out=[] for i in query.decode('utf-8'): try: if i>= u'\u4e00' and i<=u'\u9fa5': idstr2=daopai[pinyin.get(i)] ids2=idstr2.split(' ') idstr=daopai[i.encode('utf-8')] ids=idstr.split(' ') counthash = mat2hash(ids,counthash) except: continue keys=sorted(counthash.iteritems(),key=lambda counthash:counthash[1],reverse=True) loop=0 for i in keys: if loop>100:break newsong=songid[i[0]] out.append(newsong) loop+=1 return out
def domain_create_task(domain_name,create_type,create_time,submitter,description,remark,compute_status=0): task_id = pinyin.get(domain_name,format='strip',delimiter='_') try: es.get(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=task_id)['_source'] return 'domain name exists!' except: try: domain_task_dict = dict() #domain_task_dict['xnr_user_no'] = xnr_user_no domain_task_dict['domain_pinyin'] = pinyin.get(domain_name,format='strip',delimiter='_') domain_task_dict['domain_name'] = domain_name domain_task_dict['create_type'] = json.dumps(create_type) domain_task_dict['create_time'] = create_time domain_task_dict['submitter'] = submitter domain_task_dict['description'] = description domain_task_dict['remark'] = remark domain_task_dict['compute_status'] = compute_status r.lpush(weibo_target_domain_detect_queue_name,json.dumps(domain_task_dict)) item_exist = dict() #item_exist['xnr_user_no'] = domain_task_dict['xnr_user_no'] item_exist['domain_pinyin'] = domain_task_dict['domain_pinyin'] item_exist['domain_name'] = domain_task_dict['domain_name'] item_exist['create_type'] = domain_task_dict['create_type'] item_exist['create_time'] = domain_task_dict['create_time'] item_exist['submitter'] = domain_task_dict['submitter'] item_exist['description'] = domain_task_dict['description'] item_exist['remark'] = domain_task_dict['remark'] item_exist['group_size'] = '' item_exist['compute_status'] = 0 # 存入创建信息 es.index(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=item_exist['domain_pinyin'],body=item_exist) mark = True except: mark =False return mark
def get_show_domain_group_detail_portrait(domain_name): domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') es_result = es.get(index=tw_domain_index_name,doc_type=tw_domain_index_type,\ id=domain_pinyin)['_source'] member_uids = es_result['member_uids'] es_mget_result = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,\ body={'ids':member_uids})['docs'] result_all = [] for result in es_mget_result: item = {} item['uid'] = '' item['nick_name'] = '' item['photo_url'] = '' item['domain'] = '' item['sensitive'] = '' item['location'] = '' item['fans_num'] = '' item['friends_num'] = '' # item['gender'] = '' item['home_page'] = "" item['influence'] = '' if result['found']: item['uid'] = result['_id'] result = result['_source'] if result.has_key('uname'): item['nick_name'] = result['uname'] if result.has_key('photo_url'): item['photo_url'] = result['photo_url'] if result.has_key('domain'): item['domain'] = result['domain'] if result.has_key('sensitive'): item['sensitive'] = result['sensitive'] if result.has_key('location'): item['location'] = result['location'] if result.has_key('fansnum'): item['fans_num'] = result['fansnum'] if result.has_key('friends_num'): item['friends_num'] = result['friendsnum'] # item['gender'] = result['gender'] if result.has_key('screenname'): item['home_page'] = "https://twitter.com/" + result[ 'screenname'] if result.has_key('influence'): item['influence'] = get_influence_relative( item['uid'], result['influence']) # else: # item['uid'] = result['_id'] # item['nick_name'] = '' # item['photo_url'] = '' # item['domain'] = '' # item['sensitive'] = '' # item['location'] = '' # item['fans_num'] = '' # item['friends_num'] = '' # # item['gender'] = '' # item['home_page'] = "" # item['influence'] = '' result_all.append(item) return result_all
def assembly(self, imgs, categorys): print('Assembly data begins...') results = [] wb = xlrd.open_workbook('sources/data-2019.10.31.xlsx') total = 0 for si in range(wb.nsheets): sh = wb.sheet_by_index(si) rows = sh.nrows - 2 total += rows category = categorys[sh.name] prefix = pinyin.get_initial(sh.name, '').upper() print('************* %s: %drows *************' % (sh.name, rows)) for ri in range(2, sh.nrows): ind = int(sh.cell_value(ri, 0)) name = sh.cell_value(ri, 1).strip() unit = sh.cell_value(ri, 2).strip() price = sh.cell_value(ri, 3) ptype = sh.row(ri)[3].ctype if ptype == 1: price = price.strip() yind = price.find('元') if yind != -1: price = float(price[:yind]) else: price = 0.0 elif ptype == 0: price = 0.0 imgpath = None try: imgpath = imgs['%s%d' % (pinyin.get(sh.name[:2], format='strip'), ind)] except Exception as e: print('Image not found: %s(%s)' % (name, e)) row = { 'id': str(uuid.uuid1()).replace('-', ''), 'numeration': '%s%06d' % (prefix, ind), 'name': name, 'img': imgpath, 'price': price, 'unit': unit, 'specs': '%.2f/%s' % (price, unit), 'amount': 0, 'category': category, 'enabled': 1, 'create_by': 'admin', 'create_time': datetime.datetime.now() } results.append(row) print('End of assembly data. Total: %d rows.' % total) return results
def _sent_pyin(self,sent): """ return the pinyin of sent :param sent: :return: """ pyins = pinyin.get(sent, delimiter= ' ', format='strip') return pyins
def xm_pinyin(name_list): for n in name_list: try: xm = pinyin.get(n, format="strip") yield xm except Exception as e: yield None
def getCityPinYin(): import pinyin sql_str = "select city_name from city where parent_id <> 0 and parent_id <> 2" res = sqlExecute(sql_str) res_pinyin = [] for item in res: res_pinyin.append(pinyin.get(item, format='strip')) return res_pinyin
def get_audio(): r = sr.Recognizer() with sr.Microphone() as source: r.adjust_for_ambient_noise(source) audio = r.listen(source) said = '' try: said = r.recognize_google(audio, language='zh-CN') transText = ts.sogou(said, from_language='zh', to_language='en') print(said + '\n') print(pinyin.get(said, format="strip", delimiter=" ") + '\n') print(pinyin.get(said, format="numerical") + '\n') print(transText + '\n') except Exception as e: print('L2ChineseBruh :) ' + str(e)) return said.lower()
def get_string_with_only_char_in_list(col, char_list=var_name_char_list): col = pinyin.get(col, format="strip") res = u"" for ch in col: if ch in char_list: res += ch col = res return col
def to_pinyin(var_str): if (isinstance(var_str, str) or isinstance(var_str, unicode)): if var_str == 'None': return "" else: return pinyin.get(var_str, format='strip', delimiter="") else: return 'wrong'
def api_sentence_jieba(): r = request.get_json() return jsonify([{ "word": seg, "pinyin": pinyin.get(seg) if regex.search(r"\p{IsHan}", seg) else "" } for seg in jieba.cut(r["entry"])])
def get_delete_domain(domain_name): domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_') try: es.delete(index=fb_domain_index_name,doc_type=fb_domain_index_type,id=domain_pinyin) mark = True except: mark = False return mark
def get_show_domain_role_info(domain_name, role_name): domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_') role_en = fb_domain_ch2en_dict[role_name] task_id = domain_pinyin + '_' + role_en es_result = es.get(index=fb_role_index_name, doc_type=fb_role_index_type, id=task_id)['_source'] return es_result
def to_pinyin(var_str): if isinstance(var_str, str): if var_str == 'None': return "" else: return pinyin.get(var_str, format='strip', delimiter="") else: return '类型不对'
def get_export_example_model(domain_name,role_name): domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_') role_en = fb_domain_ch2en_dict[role_name] task_id = 'fb_' + domain_pinyin + '_' + role_en example_model_file_name = EXAMPLE_MODEL_PATH + task_id + '.json' with open(example_model_file_name,"r") as dump_f: es_result = json.load(dump_f) return es_result
def generate_river_entitylist(table): """Get river entity list""" nrows = table.nrows riverlist = [] for i in range(nrows): row = table.row_values(i) while '' in row: row.remove('') UUID = str_uni('null') valid_time = "21/11/2017" transact_time = time.strftime("%d/%m/%Y") tag = [] member = river_extract_restinfo.river_extract_member(row[0]) riverCode_dict = river_extract_restinfo.extract_riverCode() member_riverCode = river_extract_restinfo.extract_member_riverCode( row[0], riverCode_dict) belongs_to = uni_str(row[2]) function = [] rule = [] coords = river_extract_coords.sort_river( 'H:\spatial entity\EXP_waterway\waterways.shp', uni_str(row[0])) cpoint, bbox, ghcode = river_extract_restinfo.adapt_ghash(coords) river_len = river_extract_restinfo.length(coords) meta = dict(note=str_uni('null'), precision=str_uni('null'), produce_time=str_uni('null'), producer=str_uni('null'), security_level=str_uni('null')) properties = dict(name=uni_str(row[0]), type="river", pinyin=pinyin.get(row[0], format='strip'), length=repr(river_len) + "km", riverCode=row[1], ghashCode=ghcode) relation = dict(flooded_area=str_uni('null')) geometry = dict(Type="MultiLine", coordinates=coords, bbox=bbox, center_point=cpoint, version_time="21/11/2017", SRID="EPSG 4326") # generate a single_river_entity single_river_entity = dict(UUID=UUID, valid_time=valid_time, transact_time=transact_time, tag=tag, member=member, member_riverCode=member_riverCode, belongs_to=belongs_to, function=function, rule=rule, meta=meta, propertities=properties, relation=relation, geometry=geometry) riverlist.append(single_river_entity) print("river entity transform done.") return riverlist
def _get_unique_slug(self): slug = pinyin.get(self.name, format='strip', delimiter='') # slug = slugify(f"{self.name}") unique_slug = slug num = 1 while Group.objects.filter(slug=unique_slug).exists(): unique_slug = '{}-{}'.format(slug, num) num += 1 return unique_slug
def next_vocab(): global index vocab_list.append([]) vocab_list[index].append(var.get()) num_pinyin = pin.get(vocab_list[index][0], format='numerical') vocab_list[index].append(num_pinyin) print(len(vocab_list)) index += 1 master.switch_frame(SetChar)
def weather_month_history(city, year): name_py = pinyin.get(city, format="strip", delimiter="") citycode = city2code(name_py) url = 'http://service.envicloud.cn:8082/v2/monthlymete/AMFJAY1JYWKXNTQYOTCXMJK2NZM0/%s/%s' % ( citycode, year) payload = "" headers = {'cache-control': "no-cache"} response = re.request("GET", url, data=payload, headers=headers) return json.loads(response.text)
def process_row(key, value): import pinyin out = '' for i in value: out += i out += '\t' out += pinyin.get(key, format='strip', delimiter=' ') out += '\t1\n' return out
def insertMongo(cont): keyword = [] global key global value symps = [] if cont == None: pass symps=[{"症状名称":[type1]},] flaga = 0 for item in content: words = wordslist() for word in words: name = word[0].strip('\n') name = name.strip(" ") datasource = word[1] # print('--------------%r-------------' % word) if name in item: keyword +=((name,datasource),) if re.match(level2,item): if flaga == 1: symps += [{key:value},] #print("key=="+key+"value::"+str(value)+'\n\n\n') else: flaga=1 key = re.match(level2,item).group(1) key = key.strip('\n') value = [] continue else: value.append(item) #print("key=="+key+"value::"+str(value)+'\n\n\n') # sympton += ((key,value),) keyword = set(keyword) result = [] for i in list(keyword): keysss = {} keysss["name"] = i[0] keysss["datasource"] = i[1] result += [keysss,] print(result) key_word = {"keyword":result} print(symps) sympton = {"content":symps } sympton.update(key_word) rank = (("rankone",str(type1).strip('\n')),("ranktwo",""),("rankthree",""),("source","common_symptom"),) sympton.update(dict(rank)) sort = (("sortrankone",pinyin.get(str(type1).strip('\n'))),("sortranktwo",""),("sortrankthree",""),) index = {"index":str(type1).strip('\n')} sympton.update(index) sympton.update(dict(sort)) w.write(str(type1)+"$$"+"common_symptom"+"\n") knowledgeBase.insert(sympton) print(sympton)
def szm_pinyin(name_list): for n in name_list: try: name = unicode(n, 'utf-8') # 转换unicode szm = str(''.join([ pinyin.get(l, format="strip")[0] for l in list(name) ])) yield szm except Exception as e: yield None
def move_to_pinyin(): global index vocab_list.append([]) vocab_list[index].append(var.get()) num_pinyin = pin.get(vocab_list[index][0], format='numerical') vocab_list[index].append(num_pinyin) print(len(vocab_list)) index = 0 master.switch_frame(CheckPinyin)
def projectx_aliases(self, data_aliases, source): #import pdb;pdb.set_trace() px_aliases = [] for aliases in data_aliases: if aliases.get("type") == 'alias' and aliases.get( "source_name") == source: px_aliases.append( unidecode.unidecode(pinyin.get(aliases.get("alias")))) return px_aliases
def before_import(self, dataset, dry_run, **kwargs): schoolClasses = [] genders = [] universalFirstNames = [] universalLastNames = [] for row in dataset.dict: schoolClass = self.get_schoolClass(row['学校'], row['班级']) schoolClasses.append(schoolClass) gender = self.get_gender(row['性别']) genders.append(gender) universalFirstNames.append( pinyin.get(row['名'], format="strip").capitalize()) universalLastNames.append( pinyin.get(row['姓'], format="strip").capitalize()) dataset.append_col(schoolClasses, header='schoolClass') dataset.append_col(genders, header='gender') dataset.append_col(universalFirstNames, header='universalFirstName') dataset.append_col(universalLastNames, header='universalLastName')
def get_en_name(cn_names): # 不要拼音 en_names = [] for n in cn_names: cn = n[:-2] # en = pinyin.get(cn) # 默认有读音 en = pinyin.get(cn, format='strip', delimiter=' ') # 无拼音 en = ''.join([s.capitalize() for s in en.split(' ')]) en_names.append(n.replace(cn, en)) return en_names
def get_city_pinyin(): import pinyin sql_str = "SELECT name FROM city WHERE parent_id <> 0" cities_pinyin = [] for item in execute_sql(sql_str): cities_pinyin.append(pinyin.get(item, format='strip')) cities_pinyin.remove('zhongqing') cities_pinyin.append('chongqing') return cities_pinyin
def add(self, village): cursor = connection.cursor() cursor.execute("INSERT INTO village (id, province, city, county, " \ "town, category, name, pinyin) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", (village.id, village.town.county.city.province.id, \ village.town.county.city.id,\ village.town.county.id, village.town.id, village.category, village.name, pinyin.get(village.name),)) connection.commit()
def process_item(self, item, spider): if spider.name not in ['meituan']: return item if self.filter_dic.get(item['restaurant_name']) == item['address']: print(item['restaurant_name']) print(item['address']) raise DropItem("Duplicate item found: %s" % item) else: self.filter_dic[item['restaurant_name']] = item['address'] try: item['lng'], item['lat'] = gaode_to_baidu(float(item['lng']), float(item['lat'])) item['province_code'] = pinyin.get(item['province']) item['city_code'] = pinyin.get(item['city']) item['region_code'] = pinyin.get(item['region']) item['area_code'] = pinyin.get(item['area']) except BaseException as e: print(e) return item
def ajax_get_macro_trendline(): task_name = request.args.get('task_name', '') pinyin_task_name = pinyin.get(task_name.encode('utf-8'), format='strip', delimiter="_") r_pinyin_task_name = "trendline_" + pinyin_task_name results = r_trendline.get(r_pinyin_task_name) return results
def ajax_get_macro_prediction(): task_name = request.args.get('task_name', '') pinyin_task_name = pinyin.get(task_name.encode('utf-8'), format='strip', delimiter="_") weibo_count, user_count, rank = get_macro_prediction_count( pinyin_task_name) return json.dumps([weibo_count, user_count, rank])
def test_get(self): """Test function pinyin.get """ self.assertEqual(pinyin.get('ÄãºÃ'), u'nihao') self.assertEqual(pinyin.get('ÄãºÃÂð?'), u'nihaoma?') self.assertEqual(pinyin.get('ÄãºÃÂð£¿'), u'nihaoma£¿') self.assertEqual(pinyin.get('ÄãºÃ'), u'nihao') self.assertEqual(pinyin.get('Ò¶'), u'ye') self.assertEqual(pinyin.get('ÄãºÃ', " "), u'ni hao') self.assertEqual(pinyin.get('ÄãºÃÂð?', " "), u'ni hao ma ?') self.assertEqual(pinyin.get('ÄãºÃÂð£¿', " "), u'ni hao ma £¿')
def insertMongo(cont): words = wordslist() rank = () sort = () medic = [] keyword = () for word in words: name = word[0].strip('\n') name = name.strip(" ") datasource = word[1] if name in result: keyword +=((name,datasource),) names = med_name.split(' ') name_med = names[0].strip('\n') # print(names[0].strip('\n')) rank = (("rankone",type1),("ranktwo",type2),("rankthree",name_med,)) rank += (("source","medicine"),) index = {"index":name_med}; levela = re.compile('【(\w+)】([\w。;((.))、:~/,%]+)') medic += [{'药品名称':med_name.strip('\n')},] if type3: medic += [{'药品类别':type3},] item = re.finditer(levela,result) for i in item: medic += [{i.group(1).strip('¥'):i.group(2).strip('¥')},] keyword = set(keyword) res = [] for it in keyword: keysss = {} keysss["name"] = it[0] keysss["datasource"] = it[1] res += [keysss,] key_word = {"keyword":res} medic = {"content":medic} meds = medic.copy() meds.update(key_word) meds.update(dict(rank)) sort = {"sortrankone":pinyin.get(str(type1).strip('\n')),"sortranktwo":pinyin.get(str(type2).strip('\n')),"sortrankthree":pinyin.get(str(med_name).strip('\n')),} meds.update(sort) meds.update(index) print(meds) mediciness.insert(meds)
def pinyin(): parser = argparse.ArgumentParser() parser.add_argument("chars", help="Input chinese words") args = parser.parse_args() if not args.chars: parser.print_help() return print(get(compat.u(args.chars)))
def import_qingbank_user(filepath=None): import pinyin import xlrd import uuid from dxc.services import api_contact, api_department book = xlrd.open_workbook('d:/2014.xls') sheet = book.sheets()[0] max_row = sheet.nrows print 'Begin init qingbank contact' name_dict = {} # max_row = 3 repeat_id = [] for i in range(1, max_row): print str.format('{0}/{1}', i, max_row) employee_id = sheet.row_values(i)[9].strip() desc = None if employee_id is None or employee_id=='': employee_id = str(uuid.uuid1()) desc = u'非在编人员' dept_name = sheet.row_values(i)[1].strip() duty = sheet.row_values(i)[2].strip() name = sheet.row_values(i)[3].strip() name_pinyin = pinyin.get(name) name_shot = pinyin.get_initial(name, '') mobile = sheet.row_values(i)[4] if isinstance(mobile, float): mobile = str(int(mobile)) mobile.strip() tel = sheet.row_values(i)[5] if isinstance(tel, float): tel = str(int(tel)) tel.strip() fax = sheet.row_values(i)[6] if isinstance(fax, float): fax = str(int(fax)) fax.strip() # 创建到数据库 user = user_datastore.get_user(employee_id + '@qingbank.cn') if user is not None: print 'already exist the same id: ' + user.username user.contact.description = 'dept_name' repeat_id.append(user.username) continue user = user_datastore.create_user(username=employee_id, password=name_pinyin, email=employee_id + '@qingbank.cn') db.session.commit() dept = api_department.first(name=dept_name) if dept is None: dept = api_department.create(name=dept_name) api_contact.create(name=name, name_pinyin=name_pinyin, name_shot=name_shot, user_id=user.id, department_id=dept.id, duty=duty, mobile=mobile, telephone=tel, fax=fax, description=desc) print repeat_id
def get_cache_file(self, text): delimiter = '#' pys = pinyin.get(text, delimiter=delimiter) if pys: pys = pys.split(delimiter) pys=[py for py in pys if re.match('[a-zA-Z]', py)] pys = ''.join(pys) pys = pys[:251-6-1] suffix = hashlib.sha1(text+str(self.get_tts_session_params())).hexdigest()[:6] filename = os.path.join(self.cache_dir, pys+'_'+suffix+'.wav') return filename
def xm_pinyin(filename,newfile): with open(filename,'r') as f: for n in f.readlines(): try: name = pinyin.get(n.rstrip()) except: pass with open(newfile,'a') as ff: ff.writelines(name+'\n')
def srtHanziToPinyin(self): """ Converts all hanzi to pinyin """ subs_pinyin = {} for key in self.srtDB: one_sub = self.srtDB[key] pin = pinyin.get(one_sub.subText) new_sub = _SrtEntry(one_sub.subNumber, one_sub.timeFrame, pin) subs_pinyin[key] = new_sub return SrtObject(subs_pinyin)
def addPinyinToHanziSrt(self): """ Appends pinyin to hanzi text """ subs_merged = {} for key in self.srtDB: one_sub = self.srtDB[key] pin = pinyin.get(one_sub.subText) new_sub = _SrtEntry(one_sub.subNumber, one_sub.timeFrame, one_sub.subText + pin) subs_merged[key] = new_sub return SrtObject(subs_merged)
def szm_pinyin(filename,newfile): with open(filename,'r') as f: for n in f.readlines(): try: name = unicode(n.rstrip(),'utf-8') # 转换unicode szm = ''.join([ pinyin.get(l)[0] for l in list(name)]) except: pass with open(newfile,'a') as ff: ff.writelines(szm+'\n')
def test_get(self): self.assertEqual(pinyin.get('你好'), 'nihao') self.assertEqual(pinyin.get(u('你好')), 'nihao') self.assertEqual(pinyin.get('你好吗?'), 'nihaoma?') self.assertEqual(pinyin.get('你好吗?'), u('nihaoma?')) self.assertEqual(pinyin.get('你好'), 'nihao') self.assertEqual(pinyin.get('叶'), 'ye')
def insertMongo(cont): global key global value keyword = [] emergency = [] if cont == None: pass emergency = [{"症状名称":type1},] flaga = 0 for item in content: words = wordslist() for word in words: name = word[0].strip('\n') name = name.strip(" ") datasource = word[1] # print('--------------%r-------------' % word) if name in item: keyword +=((name,datasource),) if re.match(level1,item): if flaga == 1: emergency += [{key:value},] #print("key=="+key+"value::"+str(value)+'\n\n\n') else: flaga=1 key = re.match(level1,item).group(1) key = key.strip('\n') value = [] continue else: value.append(item) #print("key=="+key+"value::"+str(value)+'\n\n\n') emergency += [{key:value},] #emergency = dict(emergency) keyword = set(keyword) result = [] for i in list(keyword): keysss = {} keysss["name"] = i[0] keysss["datasource"] = i[1] result += [keysss,] print(result) key_word = {"keyword":result} emergency = {"content":emergency} emergency.update(key_word) rank = (("rankone",type1.strip('\n')),("ranktwo",""),("rankthree",""),("source","emergency"),) emergency.update(dict(rank)) index = {"index":type1.strip('\n')} sort = (("sortrankone",pinyin.get(str(type1).strip('\n'))),("sortranktwo",""),("sortrankthree",""),) emergency.update(index) emergency.update(dict(sort)) w.write(str(type1).strip('\n')+"$$"+"emergency"+"\n") knowledgeBase.insert(emergency)