Exemplo n.º 1
0
def searchRecursive( node, letter, word, previousRow, results, maxCost ):

    columns = len( word ) + 1
    currentRow = [ previousRow[0] + 1 ]

    # Build one row for the letter, with a column for each letter in the target
    # word, plus one for the empty string at column 0
    for column in xrange( 1, columns ):

        insertCost = currentRow[column - 1] + 1
        deleteCost = previousRow[column] + 1
        if word[column - 1] == letter or pinyin.get(word[column - 1])==pinyin.get(letter):
            replaceCost = previousRow[ column - 1 ] 
        else:                
            replaceCost = previousRow[ column - 1 ] + 1

        currentRow.append( min( insertCost, deleteCost, replaceCost ) )
    # if the last entry in the row indicates the optimal cost is less than the
    # maximum cost, and there is a word in this trie node, then add it.
    if currentRow[-1] <= maxCost and node.word != None:
        results.append( [node.word, currentRow[-1]]  )

    # if any entries in the row are less than the maximum cost, then 
    # recursively search each branch of the trie
    if min( currentRow ) <= maxCost:
        for letter in node.children:
            searchRecursive( node.children[letter], letter, word, currentRow, 
                results, maxCost )
Exemplo n.º 2
0
 def test_numbers(self):
     """Test the get/get_consonant/get_vowel function for arabic numbsers
     """
     self.assertEqual(pinyin.get('2'), u'er')
     self.assertEqual(pinyin.get('1234567890'), u'yiersansiwuliuqibajiuling')
     self.assertEqual(pinyin.get_consonant('1'), u'y')
     self.assertEqual(pinyin.get_vowel('1'), u'i')
def insertMongo(cont):
    print(type1+'#########')
    print(type2+'#########')
    print(type3+'#########')
    global keyword
    global key
    global value
    keyword = []
    tech = []
    if cont == None:
        pass
    tech = [{"名称":[type3]},]
    flaga = 0
    for item in content:
        words = wordslist()
        for word in words:
            name = word[0].strip('\n')
            name = name.strip(" ")
            datasource = word[1]
           # print('--------------%r-------------' % word)
            if name in item:
                keyword +=((name,datasource),)
        if '【' in item:
            if flaga == 1:
                tech += [{key:value},]
                #print("key=="+key+"value: :"+str(value)+'\n\n\n')
            else: flaga=1
            item = item.replace('【','')
            item = item.replace('】','')
            key = item.replace(' ','')
            key = key.strip('\n')
            value = []
            continue
        else:
            value.append(item)
    #print("key=="+key+"value::"+str(value)+'\n\n\n')
    tech += [{key:value},]
    keyword = set(keyword)
    result = []
    for i in list(keyword):
        keysss = {}
        keysss["name"] = i[0]
        keysss["datasource"] = i[1]
        result += [keysss,]

    key_word = {"keyword":result}

    tech = {"content":tech}
    tech.update(key_word)
    rank = (("rankone",type1.strip('\n')),("ranktwo",type2.strip('\n')),("rankthree",type3.strip('\n')),("source","diag_rule"),)
    tech.update(dict(rank))
    sort = (("sortrankone",pinyin.get(str(type1).strip('\n'))),("sortranktwo",pinyin.get(str(type2).strip('\n'))),("sortrankthree",pinyin.get(str(type3).strip('\n'))),)
    index = {"index":type3.strip('\n')}
    tech.update(index)
    tech.update(sort)
    #w.write(str(type3).strip('\n')+"$$"+"diag_rule"+"\n")

    knowledgeBase.insert(tech)
    print(tech)
Exemplo n.º 4
0
def Line5():
    noun = open('../data/freqword_n.txt', encoding='utf-8').readlines()
    verb = open('../data/freqword_v.txt', encoding='utf-8').readlines()

    nounlist = []
    for word in noun:
        # outfile.write(pinyin.get(word, format="strip")+" ")
        i = 0
        while i<len(word):
            if word[i:i+2]!="\n":
                nounlist.append(word[i:i+2])
            i=i+3

    verblist = []
    for word in verb:
        i = 0
        while i<len(word):
            if word[i:i+2]!="\n":
                verblist.append(word[i:i+2])
            i=i+3

    # sentence = ""
    count = 0
    num = 0

    rhythm = ""
    rhythmList = ["a", "e", "i", "o", "u"]
    while num < 4:
        i = random.randint(1, len(nounlist)-1)
        i1 = random.randint(1, len(nounlist)-1)
        j = random.randint(1, len(verblist)-1)

        ind = 0
        ind1 = 0
        if (num == 1):
            rhythm = ""
            verse = pinyin.get(nounlist[i1][1], format="strip")
            for p in range(len(verse)-1, -1, -1):
                if verse[p] in rhythmList:
                    ind = p

            rhythm = verse[ind:len(verse)]

        if (num == 3):
            ind1 = 0
            verse1 = pinyin.get(nounlist[i1][1], format="strip")
            for p in range(len(verse1)-1, -1, -1):
                    if verse1[p] in rhythmList:
                        ind1 = p

            while verse1[ind1: len(verse1)] != rhythm:
                i1 = random.randint(1, len(nounlist)-1)
                verse1 = pinyin.get(nounlist[i1][1], format="strip")
                for p in range(len(verse1)-1, -1, -1):
                    if verse1[p] in rhythmList:
                        ind1 = p

        print(nounlist[i]+verblist[j][1]+nounlist[i1])
        num += 1
Exemplo n.º 5
0
 def test_init_contact(self):
     """
                 初始化联系信息
     """
     import xlrd
     book = xlrd.open_workbook('d:/a.xls')
     sheet = book.sheets()[0]
     max_row = sheet.nrows
     print 'begin init database'
     name_dict = {}
     max_row = 3
     for i in range(2, max_row):
         print 'row', i
         dept = sheet.row_values(i)[1].encode("utf-8").strip()
         job =  sheet.row_values(i)[2].encode("utf-8").strip()
         name = sheet.row_values(i)[3].encode("utf-8").strip()
         name_pinyin = pinyin.get(name)
         name_shot = pinyin.get_initial(name, '')
          
         username = name_pinyin
          
         mobile =  str(sheet.row_values(i)[4])
         telephone = str(sheet.row_values(i)[5])
         innerphone = str(sheet.row_values(i)[6])
         fax = str(sheet.row_values(i)[7])
          
         #去掉那些float的.
         mobile = self.remove_dot(mobile)
         telephone = self.remove_dot(telephone)
         innerphone = self.remove_dot(innerphone)
         fax = self.remove_dot(fax)
          
         #统计重复的名字拼音
         if name_pinyin in name_dict:
             username = username + str(name_dict[name_pinyin])
             name_dict[name_pinyin] += 1
             pass
         else:
             name_dict[name_pinyin]=1
              
         print name, job, mobile, telephone, innerphone, fax, dept, pinyin.get(name), name_shot, username
          
         
         #创建到数据库
         user = User.objects.get(username='******');
         
         
         qbinfo = QBContactInfo(duty=job, mobile=mobile, telephone=telephone, innerphone=innerphone, fax=fax)
         qbinfo.user = user
         qbinfo.save()
          
     print len(name_dict)
     print name_dict
     print 'end'
Exemplo n.º 6
0
    def test_get(self):
        self.assertEqual(pinyin.get('你好'),
                         pinyin.get('你好', format="diacritical"))
        self.assertEqual(pinyin.get(u('你好'), format="strip"), u('nihao'))
        self.assertEqual(pinyin.get(u('你好'), format="numerical"), u('ni3hao3'))
        self.assertEqual(pinyin.get(u('你好'), format="diacritical"), u('nǐhǎo'))
        self.assertEqual(pinyin.get('你好吗?'), u('nǐhǎoma?'))
        self.assertEqual(pinyin.get('你好吗?'), u('nǐhǎoma?'))

        self.assertEqual(pinyin.get('你好'), u('nǐhǎo'))
        self.assertEqual(pinyin.get('叶'), u('yè'))
        self.assertEqual(pinyin.get('少女'), u('shǎonv̌'))
Exemplo n.º 7
0
    def parse_small_city(self, response):
        item = response.meta['item']
        request_list = []
        url_set = set()

        #category
        category_url_list = response.xpath('//li[@class="term-list-item"]//ul[@class="desc Fix"]//li/a[starts-with(@onclick, "pageTracker._trackPageview(\'dp_home_food_hotdaohang_fenlei")]/@href').extract()
        category_list = response.xpath('//li[@class="term-list-item"]//ul[@class="desc Fix"]//li/a[starts-with(@onclick, "pageTracker._trackPageview(\'dp_home_food_hotdaohang_fenlei")]/text()').extract()

        #area
        region_data = response.xpath('//div[@class="pop-panel ep_quick-search ep_quick-search-regions Fix"]/div[@class="dp-option-wrap"]/dl').extract()
        for region in region_data:
            region_html = Selector(text=region)
            region_id = region_html.xpath('//dt/a/@data-value').extract()[0].strip()
            region = region_html.xpath('//dt/a/strong/text()').extract()[0].strip('[]\n\r\t ')

            area_list = region_html.xpath('//ul/li/a/text()').extract()
            area_id_list = region_html.xpath('//ul/li/a/@data-value').extract()
            for area_index, area in enumerate(area_list):
                for cate_index, category in enumerate(category_list):
                    one_item = deepcopy(item)
                    one_item['category'] = category.strip()
                    one_item['region'] = region.strip()
                    one_item['region_code'] = pinyin.get(one_item['region'])
                    one_item['area'] = area.strip()
                    one_item['area_code'] = pinyin.get(one_item['area'])
                    url = "".join(['http://www.dianping.com', category_url_list[cate_index], 'r', area_id_list[area_index]])

                    small_city_xx_cnt += 1
                    print('大城市商圈分类url数量:\t'+str(small_city_xx_cnt))
                    print(url)
                    print(one_item['city'])
                    print(one_item['category'])
                    print(one_item['area'])

                    if url not in url_set:
                        url_set.add(url)
                    else:
                        continue
                    global small_city_xx_cnt
                    request_list.append(Request(url,
                                                method='GET',
                                                meta={'item': one_item},
                                                headers=headers,
                                                cookies=None,
                                                body=None,
                                                priority=0,
                                                errback=None,
                                                encoding=response.encoding,
                                                callback=self.parse_restaurant_list))

        return request_list
Exemplo n.º 8
0
    def parse_big_city(self, response):
        item = response.meta['item']
        request_list = []
        url_set = set()

        #category
        category_id_list = response.xpath('//div[@class="secondary-category J-secondary-category"]/a[starts-with(@onclick, "pageTracker._trackPageview(\'dp_head_guangzhou_food_fenlei")]/@href').extract()
        category_list = response.xpath('//div[@class="secondary-category J-secondary-category"]/a[starts-with(@onclick, "pageTracker._trackPageview(\'dp_head_guangzhou_food_fenlei")]/text()').extract()

        #area
        js = re.search('<script class="J_auto-load" type="text/plain">([\s\S]*?)</script>', response.body).group(1)
        data = Selector(text=js)
        region_data = data.xpath('//div[@class="fpp_business"]//dl').extract()
        for region in region_data:
            region_html = Selector(text=region)
            region = region_html.xpath('//dt/a/text()').extract()[0].strip('[]\n\r\t ')

            area_list = region_html.xpath('//li/a/text()').extract()
            area_url_list = region_html.xpath('//li/a/@href').extract()
            for area_index, area in enumerate(area_list):
                for cate_index, category in enumerate(category_list):
                    one_item = deepcopy(item)
                    one_item['category'] = category.strip()
                    one_item['region'] = region
                    one_item['region_code'] = pinyin.get(one_item['region'])
                    one_item['area'] = area.strip()
                    one_item['area_code'] = pinyin.get(one_item['area'])

                    url = "".join(['http://www.dianping.com', area_url_list[area_index], 'g', category_id_list[cate_index]])
                    print('大城市商圈分类url数量:\t'+str(big_city_xx_cnt))
                    print(one_item['city'])
                    print(one_item['category'])
                    print(one_item['area'])
                    print(url)
                    if url not in url_set:
                        url_set.add(url)
                    else:
                        continue
                    global big_city_xx_cnt
                    big_city_xx_cnt += 1
                    request_list.append(Request(url,
                                                method='GET',
                                                meta={'item': one_item},
                                                headers=headers,
                                                cookies=None,
                                                body=None,
                                                priority=0,
                                                errback=None,
                                                encoding=response.encoding,
                                                callback=self.parse_restaurant_list))

        return request_list
Exemplo n.º 9
0
def makeVariants(ptxt):
    ptxt = ptxt.strip()
    plst = list(ptxt)
    for i in range(0, len(plst), 1):
        plst[i] = pinyin.get(plst[i])
    roman = u'.'.join(plst)
    en = pinyin.get(ptxt)
    try:
        en = en[0].upper() + en[1:].lower()
    except:
        print ptxt
        print en
        raise
    return [ptxt, roman, en]
Exemplo n.º 10
0
def register(request):
    if request.method == 'POST':
        form = UserCreationForm(request.POST)
        if form.is_valid():
            cd = form.cleaned_data
            new_user = form.save()
            new_user.first_name = cd['first_name']
            new_user.last_name = cd['last_name']
            new_user.email = cd['username']
            new_user.save()
            aDomain = pinyin.get('-'.join(cd['first_name'] + cd['last_name']))
            regex = '^' + aDomain + '(-\d+)?'
            same_name = AccountModel.objects.filter(
                aDomain__regex=regex).order_by('-aUser__id')
            if not same_name:
                p = AccountModel(aUser=new_user, aDomain=aDomain)
            else:
                the_last = same_name[0].aDomain
                regex1 = re.compile('^' + aDomain + '$')
                regex2 = re.compile('^' + aDomain + '-(\d+)')
                match1 = re.match(regex1, the_last)
                match2 = re.match(regex2, the_last)
                if match1:
                    p = AccountModel(aUser=new_user, aDomain=aDomain + '-1')
                else:
                    match2.group(1)
                    p = AccountModel(
                        aUser=new_user, aDomain=aDomain + '-' + str(int(match2.group(1)) + 1))
            p.save()
        return redirect("/register")
    else:
        form = UserCreationForm()
        ctx = {'form': form}
        ctx.update(csrf(request))
        return render(request, "register.html", ctx)
Exemplo n.º 11
0
def get_role2feature_info(domain_name, role_name):
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    role_name_en = domain_ch2en_dict[role_name]
    _id = domain_pinyin + '_' + role_name_en
    try:
        es_result = es.get(index=weibo_role_index_name,
                           doc_type=weibo_role_index_type,
                           id=_id)['_source']

        feature_info_dict = es_result
        feature_filter_dict = dict()

        feature_filter_dict['political_side'] = json.loads(
            feature_info_dict['political_side'])
        try:
            feature_filter_dict['psy_feature'] = json.loads(
                feature_info_dict['psy_feature'])
        except:
            feature_filter_dict['psy_feature'] = []

        #print
        return feature_filter_dict

    except:
        return []
Exemplo n.º 12
0
 def add(self, town):
     cursor = connection.cursor()
     cursor.execute("INSERT INTO town (id, province, city, county, name, " \
         "pinyin) VALUES (?, ?, ?, ?, ?, ?)",
         (town.id, town.county.city.province.id, town.county.city.id,\
          town.county.id, town.name, pinyin.get(town.name),))
     connection.commit()
Exemplo n.º 13
0
 def add(self, county):
     cursor = connection.cursor()
     cursor.execute("INSERT INTO county (id, province, city, name, pinyin)" \
         " VALUES (?, ?, ?, ?, ?)",
         (county.id, county.city.province.id, county.city.id, \
          county.name, pinyin.get(county.name),))
     connection.commit()
Exemplo n.º 14
0
 def to_city(self, var_str, type):
     if isinstance(var_str, str):
         if var_str == 'None':
             return ''
         else:
             stringHead = ''
             stringFull = ''
             for single in var_str:
                 stringFull += pinyin.get(single, format='strip', delimiter="")
                 stringHead += pinyin.get(single, format='strip', delimiter="")[0]
             if type == "FULL":
                 return stringFull
             elif type == "HEAD":
                 return stringHead
     else:
         return ''
Exemplo n.º 15
0
def get_add_opinion_corpus(task_detail):

    mark = False

    corpus_name = task_detail['corpus_name']
    corpus_pinyin = pinyin.get(corpus_name, format='strip', delimiter='_')

    item_dict = {}
    item_dict['corpus_name'] = corpus_name
    item_dict['corpus_pinyin'] = corpus_pinyin
    item_dict['submitter'] = task_detail['submitter']
    item_dict['status'] = 0

    try:
        es.get(index=opinion_corpus_index_name,
               doc_type=opinion_corpus_index_type,
               id=corpus_pinyin)

        return 'exists'

    except:

        try:
            es.index(index=opinion_corpus_index_name,
                     doc_type=opinion_corpus_index_type,
                     body=item_dict,
                     id=corpus_pinyin)
            mark = True

            r_r.lpush(opinion_expand_task_queue_name, json.dumps(item_dict))

        except:
            pass

    return mark
Exemplo n.º 16
0
def upload():
    if request.method == 'POST':
        file_val = request.files['file']
        # print('filename:', file_val.filename)
        if file_val and allowed_file(file_val.filename):
            ascii_name = pinyin.get(file_val.filename)
            filename = secure_filename(ascii_name)
            file_path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename)
            file_val.save(file_path)

            # cloud_sight 
            try:
                cloud_img = CloudImage(file=open(file_path, 'rb'), locale=LOCALE, lang=LANG)
            except:
                return jsonify(type='mistake', content=None)

            try:
                os.remove(file_path)
            except:
                print('Remove failed. file not found.')
            if cloud_img:
                result = cloud_img.result()
                if result:
                    # print('result is', result.encode('utf-8'))
                    return jsonify(type='result', content=result)
                else:
                    print('result is None')
                    return jsonify(type='mistake', content=None)
            else:
                print('cloud_img is None')
                return jsonify(type='mistake', content=None)
Exemplo n.º 17
0
def search(query,tag):
	if tag=='song':
		songid=anydbm.open('id_song.db','r')
		daopai=anydbm.open('daopai_song.db','r')
	elif tag=='singer':
		songid=anydbm.open('id_singer.db','r')
		daopai=anydbm.open('daopai_singer.db','r')
	elif tag=='both':
		songid=anydbm.open('id_both.db','r')
		daopai=anydbm.open('daopai_both.db','r')
	counthash={}
	out=[]
	for i in query.decode('utf-8'):
		try:
			if i>= u'\u4e00' and i<=u'\u9fa5':
				idstr2=daopai[pinyin.get(i)]
				ids2=idstr2.split(' ')
			idstr=daopai[i.encode('utf-8')]
			ids=idstr.split(' ')
			counthash = mat2hash(ids,counthash)
		except:
			continue
	keys=sorted(counthash.iteritems(),key=lambda counthash:counthash[1],reverse=True)
	loop=0
	for i in keys:
		
		if loop>100:break
		newsong=songid[i[0]]	
		
		out.append(newsong)
		loop+=1
	return out
Exemplo n.º 18
0
def domain_create_task(domain_name,create_type,create_time,submitter,description,remark,compute_status=0):
    
    task_id = pinyin.get(domain_name,format='strip',delimiter='_')

    try:
        es.get(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=task_id)['_source']

        return 'domain name exists!'

    except:

        try:
            domain_task_dict = dict()

            #domain_task_dict['xnr_user_no'] = xnr_user_no
            domain_task_dict['domain_pinyin'] = pinyin.get(domain_name,format='strip',delimiter='_')
            domain_task_dict['domain_name'] = domain_name
            domain_task_dict['create_type'] = json.dumps(create_type)
            domain_task_dict['create_time'] = create_time
            domain_task_dict['submitter'] = submitter
            domain_task_dict['description'] = description
            domain_task_dict['remark'] = remark
            domain_task_dict['compute_status'] = compute_status

            r.lpush(weibo_target_domain_detect_queue_name,json.dumps(domain_task_dict))

            item_exist = dict()
            
            #item_exist['xnr_user_no'] = domain_task_dict['xnr_user_no']
            item_exist['domain_pinyin'] = domain_task_dict['domain_pinyin']
            item_exist['domain_name'] = domain_task_dict['domain_name']
            item_exist['create_type'] = domain_task_dict['create_type']
            item_exist['create_time'] = domain_task_dict['create_time']
            item_exist['submitter'] = domain_task_dict['submitter']
            item_exist['description'] = domain_task_dict['description']
            item_exist['remark'] = domain_task_dict['remark']
            item_exist['group_size'] = ''
            
            item_exist['compute_status'] = 0  # 存入创建信息
            es.index(index=weibo_domain_index_name,doc_type=weibo_domain_index_type,id=item_exist['domain_pinyin'],body=item_exist)


            mark = True
        except:
            mark =False

        return mark
Exemplo n.º 19
0
def get_show_domain_group_detail_portrait(domain_name):
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    es_result = es.get(index=tw_domain_index_name,doc_type=tw_domain_index_type,\
                id=domain_pinyin)['_source']
    member_uids = es_result['member_uids']
    es_mget_result = es_user_portrait.mget(index=portrait_index_name,doc_type=portrait_index_type,\
                    body={'ids':member_uids})['docs']
    result_all = []
    for result in es_mget_result:
        item = {}
        item['uid'] = ''
        item['nick_name'] = ''
        item['photo_url'] = ''
        item['domain'] = ''
        item['sensitive'] = ''
        item['location'] = ''
        item['fans_num'] = ''
        item['friends_num'] = ''
        # item['gender'] = ''
        item['home_page'] = ""
        item['influence'] = ''
        if result['found']:
            item['uid'] = result['_id']
            result = result['_source']
            if result.has_key('uname'):
                item['nick_name'] = result['uname']
            if result.has_key('photo_url'):
                item['photo_url'] = result['photo_url']
            if result.has_key('domain'):
                item['domain'] = result['domain']
            if result.has_key('sensitive'):
                item['sensitive'] = result['sensitive']
            if result.has_key('location'):
                item['location'] = result['location']
            if result.has_key('fansnum'):
                item['fans_num'] = result['fansnum']
            if result.has_key('friends_num'):
                item['friends_num'] = result['friendsnum']
            # item['gender'] = result['gender']
            if result.has_key('screenname'):
                item['home_page'] = "https://twitter.com/" + result[
                    'screenname']
            if result.has_key('influence'):
                item['influence'] = get_influence_relative(
                    item['uid'], result['influence'])
        # else:
        #     item['uid'] = result['_id']
        #     item['nick_name'] = ''
        #     item['photo_url'] = ''
        #     item['domain'] = ''
        #     item['sensitive'] = ''
        #     item['location'] = ''
        #     item['fans_num'] = ''
        #     item['friends_num'] = ''
        #     # item['gender'] = ''
        #     item['home_page'] = ""
        #     item['influence'] = ''
        result_all.append(item)
    return result_all
Exemplo n.º 20
0
    def assembly(self, imgs, categorys):
        print('Assembly data begins...')

        results = []

        wb = xlrd.open_workbook('sources/data-2019.10.31.xlsx')
        total = 0
        for si in range(wb.nsheets):
            sh = wb.sheet_by_index(si)
            rows = sh.nrows - 2
            total += rows

            category = categorys[sh.name]
            prefix = pinyin.get_initial(sh.name, '').upper()

            print('************* %s: %drows *************' % (sh.name, rows))
            for ri in range(2, sh.nrows):
                ind = int(sh.cell_value(ri, 0))
                name = sh.cell_value(ri, 1).strip()
                unit = sh.cell_value(ri, 2).strip()

                price = sh.cell_value(ri, 3)
                ptype = sh.row(ri)[3].ctype
                if ptype == 1:
                    price = price.strip()
                    yind = price.find('元')
                    if yind != -1:
                        price = float(price[:yind])
                    else:
                        price = 0.0
                elif ptype == 0:
                    price = 0.0

                imgpath = None
                try:
                    imgpath = imgs['%s%d' % (pinyin.get(sh.name[:2],
                                                        format='strip'), ind)]
                except Exception as e:
                    print('Image not found: %s(%s)' % (name, e))

                row = {
                    'id': str(uuid.uuid1()).replace('-', ''),
                    'numeration': '%s%06d' % (prefix, ind),
                    'name': name,
                    'img': imgpath,
                    'price': price,
                    'unit': unit,
                    'specs': '%.2f/%s' % (price, unit),
                    'amount': 0,
                    'category': category,
                    'enabled': 1,
                    'create_by': 'admin',
                    'create_time': datetime.datetime.now()
                }
                results.append(row)

        print('End of assembly data. Total: %d rows.' % total)

        return results
Exemplo n.º 21
0
 def _sent_pyin(self,sent):
     """
     return the pinyin of sent
     :param sent: 
     :return: 
     """
     pyins = pinyin.get(sent, delimiter= ' ', format='strip')
     return  pyins
Exemplo n.º 22
0
def xm_pinyin(name_list):

	for n in name_list:
		try:
			xm = pinyin.get(n, format="strip")
			yield xm
		except Exception as e:
			yield None
Exemplo n.º 23
0
def getCityPinYin():
    import pinyin
    sql_str = "select city_name from city where parent_id <> 0 and parent_id <> 2"
    res = sqlExecute(sql_str)
    res_pinyin = []
    for item in res:
        res_pinyin.append(pinyin.get(item, format='strip'))
    return res_pinyin
Exemplo n.º 24
0
def get_audio():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        r.adjust_for_ambient_noise(source)
        audio = r.listen(source)
        said = ''
        try:
            said = r.recognize_google(audio, language='zh-CN')
            transText = ts.sogou(said, from_language='zh', to_language='en')
            print(said + '\n')
            print(pinyin.get(said, format="strip", delimiter=" ") + '\n')
            print(pinyin.get(said, format="numerical") + '\n')
            print(transText + '\n')
        except Exception as e:
            print('L2ChineseBruh :) ' + str(e))

    return said.lower()
Exemplo n.º 25
0
def get_string_with_only_char_in_list(col, char_list=var_name_char_list):
    col = pinyin.get(col, format="strip")
    res = u""
    for ch in col:
        if ch in char_list:
            res += ch
    col = res
    return col
Exemplo n.º 26
0
def to_pinyin(var_str):
    if (isinstance(var_str, str) or isinstance(var_str, unicode)):
        if var_str == 'None':
            return ""
        else:
            return pinyin.get(var_str, format='strip', delimiter="")
    else:
        return 'wrong'
Exemplo n.º 27
0
def api_sentence_jieba():
    r = request.get_json()
    return jsonify([{
        "word":
        seg,
        "pinyin":
        pinyin.get(seg) if regex.search(r"\p{IsHan}", seg) else ""
    } for seg in jieba.cut(r["entry"])])
Exemplo n.º 28
0
def get_delete_domain(domain_name):
    domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_')
    try:
        es.delete(index=fb_domain_index_name,doc_type=fb_domain_index_type,id=domain_pinyin)
        mark = True
    except:
        mark = False
    return mark
Exemplo n.º 29
0
def get_show_domain_role_info(domain_name, role_name):
    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    role_en = fb_domain_ch2en_dict[role_name]
    task_id = domain_pinyin + '_' + role_en
    es_result = es.get(index=fb_role_index_name,
                       doc_type=fb_role_index_type,
                       id=task_id)['_source']
    return es_result
Exemplo n.º 30
0
def to_pinyin(var_str):
    if isinstance(var_str, str):
        if var_str == 'None':
            return ""
        else:
            return pinyin.get(var_str, format='strip', delimiter="")
    else:
        return '类型不对'
Exemplo n.º 31
0
def get_export_example_model(domain_name,role_name):
    domain_pinyin = pinyin.get(domain_name,format='strip',delimiter='_')
    role_en = fb_domain_ch2en_dict[role_name]
    task_id = 'fb_' + domain_pinyin + '_' + role_en
    example_model_file_name = EXAMPLE_MODEL_PATH + task_id + '.json'
    with open(example_model_file_name,"r") as dump_f:
        es_result = json.load(dump_f)
    return es_result
Exemplo n.º 32
0
def generate_river_entitylist(table):
    """Get river entity list"""
    nrows = table.nrows
    riverlist = []
    for i in range(nrows):
        row = table.row_values(i)
        while '' in row:
            row.remove('')
        UUID = str_uni('null')
        valid_time = "21/11/2017"
        transact_time = time.strftime("%d/%m/%Y")
        tag = []
        member = river_extract_restinfo.river_extract_member(row[0])
        riverCode_dict = river_extract_restinfo.extract_riverCode()
        member_riverCode = river_extract_restinfo.extract_member_riverCode(
            row[0], riverCode_dict)
        belongs_to = uni_str(row[2])
        function = []
        rule = []
        coords = river_extract_coords.sort_river(
            'H:\spatial entity\EXP_waterway\waterways.shp', uni_str(row[0]))
        cpoint, bbox, ghcode = river_extract_restinfo.adapt_ghash(coords)
        river_len = river_extract_restinfo.length(coords)
        meta = dict(note=str_uni('null'),
                    precision=str_uni('null'),
                    produce_time=str_uni('null'),
                    producer=str_uni('null'),
                    security_level=str_uni('null'))
        properties = dict(name=uni_str(row[0]),
                          type="river",
                          pinyin=pinyin.get(row[0], format='strip'),
                          length=repr(river_len) + "km",
                          riverCode=row[1],
                          ghashCode=ghcode)
        relation = dict(flooded_area=str_uni('null'))
        geometry = dict(Type="MultiLine",
                        coordinates=coords,
                        bbox=bbox,
                        center_point=cpoint,
                        version_time="21/11/2017",
                        SRID="EPSG 4326")
        # generate a single_river_entity
        single_river_entity = dict(UUID=UUID,
                                   valid_time=valid_time,
                                   transact_time=transact_time,
                                   tag=tag,
                                   member=member,
                                   member_riverCode=member_riverCode,
                                   belongs_to=belongs_to,
                                   function=function,
                                   rule=rule,
                                   meta=meta,
                                   propertities=properties,
                                   relation=relation,
                                   geometry=geometry)
        riverlist.append(single_river_entity)
    print("river entity transform done.")
    return riverlist
Exemplo n.º 33
0
 def _get_unique_slug(self):
     slug = pinyin.get(self.name, format='strip', delimiter='')
     # slug = slugify(f"{self.name}")
     unique_slug = slug
     num = 1
     while Group.objects.filter(slug=unique_slug).exists():
         unique_slug = '{}-{}'.format(slug, num)
         num += 1
     return unique_slug
Exemplo n.º 34
0
 def next_vocab():
     global index
     vocab_list.append([])
     vocab_list[index].append(var.get())
     num_pinyin = pin.get(vocab_list[index][0], format='numerical')
     vocab_list[index].append(num_pinyin)
     print(len(vocab_list))
     index += 1
     master.switch_frame(SetChar)
Exemplo n.º 35
0
def weather_month_history(city, year):
    name_py = pinyin.get(city, format="strip", delimiter="")
    citycode = city2code(name_py)
    url = 'http://service.envicloud.cn:8082/v2/monthlymete/AMFJAY1JYWKXNTQYOTCXMJK2NZM0/%s/%s' % (
        citycode, year)
    payload = ""
    headers = {'cache-control': "no-cache"}
    response = re.request("GET", url, data=payload, headers=headers)
    return json.loads(response.text)
Exemplo n.º 36
0
def process_row(key, value):
    import pinyin
    out = ''
    for i in value:
        out += i
        out += '\t'
        out += pinyin.get(key, format='strip', delimiter=' ')
        out += '\t1\n'
    return out
def insertMongo(cont):
    keyword = []
    global key
    global value
    symps = []
    if cont == None:
        pass
    symps=[{"症状名称":[type1]},]
    flaga = 0
    for item in content:
        words = wordslist()
        for word in words:
            name = word[0].strip('\n')
            name = name.strip(" ")
            datasource = word[1]
           # print('--------------%r-------------' % word)
            if name in item:
                keyword +=((name,datasource),)
        if re.match(level2,item):
            if flaga == 1:
                symps += [{key:value},]
                #print("key=="+key+"value::"+str(value)+'\n\n\n')
            else: flaga=1
            key = re.match(level2,item).group(1)
            key = key.strip('\n')
            value = []
            continue
        else:
            value.append(item)
    #print("key=="+key+"value::"+str(value)+'\n\n\n')
   # sympton += ((key,value),)
    keyword = set(keyword)

    result = []
    for i in list(keyword):
        keysss = {}
        keysss["name"] = i[0]
        keysss["datasource"] = i[1]
        result += [keysss,]
    print(result)
    key_word = {"keyword":result}


    print(symps)
    sympton = {"content":symps }
    sympton.update(key_word)
    rank = (("rankone",str(type1).strip('\n')),("ranktwo",""),("rankthree",""),("source","common_symptom"),)
    sympton.update(dict(rank))
    sort = (("sortrankone",pinyin.get(str(type1).strip('\n'))),("sortranktwo",""),("sortrankthree",""),)

    index = {"index":str(type1).strip('\n')}
    sympton.update(index)
    sympton.update(dict(sort))

    w.write(str(type1)+"$$"+"common_symptom"+"\n")
    knowledgeBase.insert(sympton)
    print(sympton)
Exemplo n.º 38
0
def szm_pinyin(name_list):
	
	for n in name_list:
		try:
			name = unicode(n, 'utf-8')  # 转换unicode
			szm = str(''.join([ pinyin.get(l, format="strip")[0] for l in list(name) ]))
			yield szm
		except Exception as e:
			yield None
Exemplo n.º 39
0
 def move_to_pinyin():
     global index
     vocab_list.append([])
     vocab_list[index].append(var.get())
     num_pinyin = pin.get(vocab_list[index][0], format='numerical')
     vocab_list[index].append(num_pinyin)
     print(len(vocab_list))
     index = 0
     master.switch_frame(CheckPinyin)
Exemplo n.º 40
0
 def projectx_aliases(self, data_aliases, source):
     #import pdb;pdb.set_trace()
     px_aliases = []
     for aliases in data_aliases:
         if aliases.get("type") == 'alias' and aliases.get(
                 "source_name") == source:
             px_aliases.append(
                 unidecode.unidecode(pinyin.get(aliases.get("alias"))))
     return px_aliases
 def before_import(self, dataset, dry_run, **kwargs):
     schoolClasses = []
     genders = []
     universalFirstNames = []
     universalLastNames = []
     for row in dataset.dict:
         schoolClass = self.get_schoolClass(row['学校'], row['班级'])
         schoolClasses.append(schoolClass)
         gender = self.get_gender(row['性别'])
         genders.append(gender)
         universalFirstNames.append(
             pinyin.get(row['名'], format="strip").capitalize())
         universalLastNames.append(
             pinyin.get(row['姓'], format="strip").capitalize())
     dataset.append_col(schoolClasses, header='schoolClass')
     dataset.append_col(genders, header='gender')
     dataset.append_col(universalFirstNames, header='universalFirstName')
     dataset.append_col(universalLastNames, header='universalLastName')
Exemplo n.º 42
0
def to_pinyin(var_str):
   
    if isinstance(var_str, str):
        if var_str == 'None':
            return ""
        else:
            return pinyin.get(var_str, format='strip', delimiter="")
    else:
        return '类型不对'
Exemplo n.º 43
0
def get_en_name(cn_names):  # 不要拼音
    en_names = []
    for n in cn_names:
        cn = n[:-2]
        # en = pinyin.get(cn)  # 默认有读音
        en = pinyin.get(cn, format='strip', delimiter=' ')  # 无拼音
        en = ''.join([s.capitalize() for s in en.split(' ')])
        en_names.append(n.replace(cn, en))
    return en_names
Exemplo n.º 44
0
def get_city_pinyin():
    import pinyin
    sql_str = "SELECT name FROM city WHERE parent_id <> 0"
    cities_pinyin = []
    for item in execute_sql(sql_str):
        cities_pinyin.append(pinyin.get(item, format='strip'))
    cities_pinyin.remove('zhongqing')
    cities_pinyin.append('chongqing')
    return cities_pinyin
Exemplo n.º 45
0
 def add(self, village):
     cursor = connection.cursor()
     cursor.execute("INSERT INTO village (id, province, city, county, " \
         "town, category, name, pinyin) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
         (village.id, village.town.county.city.province.id, \
          village.town.county.city.id,\
          village.town.county.id, village.town.id, village.category,
          village.name, pinyin.get(village.name),))
     connection.commit()
Exemplo n.º 46
0
 def add(self, village):
     cursor = connection.cursor()
     cursor.execute("INSERT INTO village (id, province, city, county, " \
         "town, category, name, pinyin) VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
         (village.id, village.town.county.city.province.id, \
          village.town.county.city.id,\
          village.town.county.id, village.town.id, village.category,
          village.name, pinyin.get(village.name),))
     connection.commit()
Exemplo n.º 47
0
 def process_item(self, item, spider):
     if spider.name not in ['meituan']:
         return item
     if self.filter_dic.get(item['restaurant_name']) == item['address']:
         print(item['restaurant_name'])
         print(item['address'])
         raise DropItem("Duplicate item found: %s" % item)
     else:
         self.filter_dic[item['restaurant_name']] = item['address']
         try:
             item['lng'], item['lat'] = gaode_to_baidu(float(item['lng']), float(item['lat']))
             item['province_code'] = pinyin.get(item['province'])
             item['city_code'] = pinyin.get(item['city'])
             item['region_code'] = pinyin.get(item['region'])
             item['area_code'] = pinyin.get(item['area'])
         except BaseException as e:
             print(e)
         return item
Exemplo n.º 48
0
def ajax_get_macro_trendline():
    task_name = request.args.get('task_name', '')
    pinyin_task_name = pinyin.get(task_name.encode('utf-8'),
                                  format='strip',
                                  delimiter="_")
    r_pinyin_task_name = "trendline_" + pinyin_task_name
    results = r_trendline.get(r_pinyin_task_name)

    return results
Exemplo n.º 49
0
def ajax_get_macro_prediction():
    task_name = request.args.get('task_name', '')
    pinyin_task_name = pinyin.get(task_name.encode('utf-8'),
                                  format='strip',
                                  delimiter="_")
    weibo_count, user_count, rank = get_macro_prediction_count(
        pinyin_task_name)

    return json.dumps([weibo_count, user_count, rank])
Exemplo n.º 50
0
 def test_get(self):
     """Test function pinyin.get 
     """
     self.assertEqual(pinyin.get('ÄãºÃ'), u'nihao')
     self.assertEqual(pinyin.get('ÄãºÃÂð?'), u'nihaoma?')
     self.assertEqual(pinyin.get('ÄãºÃÂð£¿'), u'nihaoma£¿')
     self.assertEqual(pinyin.get('ÄãºÃ'), u'nihao')
     self.assertEqual(pinyin.get('Ò¶'), u'ye')
     
     self.assertEqual(pinyin.get('ÄãºÃ', " "), u'ni hao')
     self.assertEqual(pinyin.get('ÄãºÃÂð?', " "), u'ni hao ma ?')
     self.assertEqual(pinyin.get('ÄãºÃÂð£¿', " "), u'ni hao ma £¿')
def insertMongo(cont):
    words = wordslist()
    rank = ()
    sort = ()
    medic = []
    keyword = ()
    for word in words:
        name = word[0].strip('\n')
        name = name.strip(" ")
        datasource = word[1]
        if name in result:
            keyword +=((name,datasource),)
    names = med_name.split(' ')
    name_med = names[0].strip('\n')
    # print(names[0].strip('\n'))
    rank = (("rankone",type1),("ranktwo",type2),("rankthree",name_med,))
    rank += (("source","medicine"),)
    index = {"index":name_med};

    levela = re.compile('【(\w+)】([\w。;((.))、:~/,%]+)')
    medic += [{'药品名称':med_name.strip('\n')},]
    if type3:
        medic += [{'药品类别':type3},]
    item = re.finditer(levela,result)
    for i in item:
        medic += [{i.group(1).strip('¥'):i.group(2).strip('¥')},]
    keyword = set(keyword)
    res = []
    for it in keyword:
        keysss = {}
        keysss["name"] = it[0]
        keysss["datasource"] = it[1]
        res += [keysss,]
    key_word = {"keyword":res}
    medic = {"content":medic}
    meds = medic.copy()
    meds.update(key_word)
    meds.update(dict(rank))
    sort = {"sortrankone":pinyin.get(str(type1).strip('\n')),"sortranktwo":pinyin.get(str(type2).strip('\n')),"sortrankthree":pinyin.get(str(med_name).strip('\n')),}
    meds.update(sort)
    meds.update(index)
    print(meds)
    mediciness.insert(meds)
Exemplo n.º 52
0
def pinyin():
    parser = argparse.ArgumentParser()
    parser.add_argument("chars", help="Input chinese words")
    args = parser.parse_args()

    if not args.chars:
        parser.print_help()
        return

    print(get(compat.u(args.chars)))
Exemplo n.º 53
0
def import_qingbank_user(filepath=None):
    import pinyin
    import xlrd
    import uuid
    from dxc.services import api_contact, api_department

    book = xlrd.open_workbook('d:/2014.xls')
    sheet = book.sheets()[0]
    max_row = sheet.nrows
    print 'Begin init qingbank contact'
    name_dict = {}
    # max_row = 3
    repeat_id = []
    for i in range(1, max_row):
        print str.format('{0}/{1}', i, max_row)
        employee_id = sheet.row_values(i)[9].strip()
        desc = None
        if employee_id is None or employee_id=='':
            employee_id = str(uuid.uuid1())
            desc = u'非在编人员'
        dept_name = sheet.row_values(i)[1].strip()
        duty = sheet.row_values(i)[2].strip()
        name = sheet.row_values(i)[3].strip()
        name_pinyin = pinyin.get(name)
        name_shot = pinyin.get_initial(name, '')
        mobile = sheet.row_values(i)[4]
        if isinstance(mobile, float):
            mobile = str(int(mobile))
        mobile.strip()
        tel = sheet.row_values(i)[5]
        if isinstance(tel, float):
            tel = str(int(tel))
        tel.strip()
        fax = sheet.row_values(i)[6]
        if isinstance(fax, float):
            fax = str(int(fax))
        fax.strip()

        # 创建到数据库
        user = user_datastore.get_user(employee_id + '@qingbank.cn')
        if user is not None:
            print 'already exist the same id: ' + user.username
            user.contact.description = 'dept_name'
            repeat_id.append(user.username)
            continue
        user = user_datastore.create_user(username=employee_id, password=name_pinyin, email=employee_id + '@qingbank.cn')
        db.session.commit()
        dept = api_department.first(name=dept_name)
        if dept is None:
            dept = api_department.create(name=dept_name)

        api_contact.create(name=name,  name_pinyin=name_pinyin, name_shot=name_shot,
                           user_id=user.id, department_id=dept.id, duty=duty, mobile=mobile, telephone=tel, fax=fax, description=desc)

    print repeat_id
Exemplo n.º 54
0
 def get_cache_file(self, text):
     delimiter = '#'
     pys = pinyin.get(text, delimiter=delimiter)
     if pys:
         pys = pys.split(delimiter)
         pys=[py for py in pys if re.match('[a-zA-Z]', py)]
         pys = ''.join(pys)
     pys = pys[:251-6-1]
     suffix = hashlib.sha1(text+str(self.get_tts_session_params())).hexdigest()[:6]
     filename = os.path.join(self.cache_dir, pys+'_'+suffix+'.wav')
     return filename
Exemplo n.º 55
0
def xm_pinyin(filename,newfile):
	
	with open(filename,'r') as f:
		for n in f.readlines():
			try:
				name = pinyin.get(n.rstrip())
			except:
				pass

			with open(newfile,'a') as ff:
				ff.writelines(name+'\n')
Exemplo n.º 56
0
 def srtHanziToPinyin(self):
     """
     Converts all hanzi to pinyin
     """
     subs_pinyin = {}
     for key in self.srtDB:
         one_sub = self.srtDB[key]
         pin = pinyin.get(one_sub.subText)
         new_sub = _SrtEntry(one_sub.subNumber, one_sub.timeFrame, pin)
         subs_pinyin[key] = new_sub
     return SrtObject(subs_pinyin)
Exemplo n.º 57
0
 def addPinyinToHanziSrt(self):
     """
     Appends pinyin to hanzi text
     """
     subs_merged = {}
     for key in self.srtDB:
         one_sub = self.srtDB[key]
         pin = pinyin.get(one_sub.subText)
         new_sub = _SrtEntry(one_sub.subNumber, one_sub.timeFrame,
                             one_sub.subText + pin)
         subs_merged[key] = new_sub
     return SrtObject(subs_merged)
Exemplo n.º 58
0
def szm_pinyin(filename,newfile):
	
	with open(filename,'r') as f:
		for n in f.readlines():
			try:
				name = unicode(n.rstrip(),'utf-8')  # 转换unicode
				szm = ''.join([ pinyin.get(l)[0] for l in list(name)])
			except:
				pass

			with open(newfile,'a') as ff:
				ff.writelines(szm+'\n')
Exemplo n.º 59
0
    def test_get(self):
        self.assertEqual(pinyin.get('你好'), 'nihao')
        self.assertEqual(pinyin.get(u('你好')), 'nihao')
        self.assertEqual(pinyin.get('你好吗?'), 'nihaoma?')
        self.assertEqual(pinyin.get('你好吗?'), u('nihaoma?'))

        self.assertEqual(pinyin.get('你好'), 'nihao')
        self.assertEqual(pinyin.get('叶'), 'ye')
def insertMongo(cont):
    global key
    global value
    keyword = []
    emergency = []
    if cont == None:
        pass
    emergency = [{"症状名称":type1},]
    flaga = 0
    for item in content:
        words = wordslist()
        for word in words:
            name = word[0].strip('\n')
            name = name.strip(" ")
            datasource = word[1]
           # print('--------------%r-------------' % word)
            if name in item:
                keyword +=((name,datasource),)
        if re.match(level1,item):
            if flaga == 1:
                emergency += [{key:value},]
                #print("key=="+key+"value::"+str(value)+'\n\n\n')
            else: flaga=1
            key = re.match(level1,item).group(1)
            key = key.strip('\n')
            value = []
            continue
        else:
            value.append(item)
    #print("key=="+key+"value::"+str(value)+'\n\n\n')
    emergency += [{key:value},]
    #emergency = dict(emergency)
    keyword = set(keyword)
    result = []
    for i in list(keyword):
        keysss = {}
        keysss["name"] = i[0]
        keysss["datasource"] = i[1]
        result += [keysss,]
    print(result)
    key_word = {"keyword":result}
    emergency = {"content":emergency}
    emergency.update(key_word)
    rank = (("rankone",type1.strip('\n')),("ranktwo",""),("rankthree",""),("source","emergency"),)
    emergency.update(dict(rank))
    index = {"index":type1.strip('\n')}
    sort = (("sortrankone",pinyin.get(str(type1).strip('\n'))),("sortranktwo",""),("sortrankthree",""),)
    emergency.update(index)
    emergency.update(dict(sort))
    w.write(str(type1).strip('\n')+"$$"+"emergency"+"\n")
    knowledgeBase.insert(emergency)