async def process_send_to_magic(callback_query: types.CallbackQuery, state: FSMContext): await callback_query.answer() await state.get_data() my_dict = await state.get_data( ) # Вытаскиваем значения переменных b_date и pub_date в словарь magic_dates = [my_dict['b_date'], my_dict['pub_date']] # Передаем их в список # Заменить на отправку в модель Classic: keywords = get_keywords(*magic_dates) title = generate_text(keywords[0], 10, model, random.randint(8, max_sequence_len - 1)) describe = predict_quantile(title) response = f"Подходящий для вас заголовок для публикации: \n{title.title()} \n \n" + describe await bot.send_message(callback_query.from_user.id, response)
def get_graph(word="Дерево", n=3): count = 0 word = word.lower() if n > 0: definition = get_definition(word) open('/tmp/rez.txt', 'a').write("=\n%s\n=\n" % definition) for word_i in unwiki(filter_keywords(get_keywords(definition))): if word_i != word and not is_stop_word(word_i): count += 1 if get_graph(word_i, n - 1): try: tree[(word, word_i)] += 1 except KeyError: tree.update({(word, word_i): 1}) return count
def get_vocabulary_dic(): files = [ os.path.join('livechat', f) for f in os.listdir('livechat') if os.path.isfile(os.path.join('livechat', f)) ] json_files = [f for f in files if os.path.splitext(f)[1] == '.json'] dic = {} ignore_set = set() for file in json_files: vocabulary = {} keywords = get_keywords(m, file, ['名詞', '形容詞', '感動詞']) for keyword in keywords: # ストップワードは除外 if keyword in sw_set: continue if keyword in vocabulary: vocabulary[keyword] += 1 else: if keyword in ignore_set: continue # 長さ1の場合は漢字のみ許可 # 造語の場合、漢字が一文字ずつばらされてしまって除外されてしまうとなかったことになってしまう # それを防ぐために漢字の場合は1文字でも許可する # (原理的には漢字以外でも同じ問題があるがすべてを許可してしまうとあまりにノイズが多すぎるので妥協する) if len(keyword) == 1: if not unicodedata.name(keyword, '').startswith('CJK UNIFIED'): ignore_set.add(keyword) continue vocabulary[keyword] = 1 dic[file] = vocabulary return dic
async def process_send_to_magic(callback_query: types.CallbackQuery, state: FSMContext): await callback_query.answer() await state.get_data() my_dict = await state.get_data( ) # Вытаскиваем значения переменных b_date и pub_date в словарь magic_dates = [my_dict['b_date'], my_dict['pub_date']] # Передаем их в список # Заменить на отправку в модель DDF: keywords = get_keywords(*magic_dates) # state = random.randint(1, 2) # if state == 1: # keywords = keywords[0] + ' ' + random.choice(gachi_tags) # else: # keywords = keywords[1] + ' ' + random.choice(gachi_tags) title = generate_text(keywords[0], 10, model, random.randint(8, max_sequence_len - 1)) title = replace_text(title, dict_gachi) describe = predict_quantile_ddf(title) response = f"Подходящий для вас заголовок для публикации: \n{title.title()} \n \n" + describe await bot.send_message(callback_query.from_user.id, response) # Сейчас бот отправляет полученный список
def main(): list_count = [] time_TP_start = time.time() list_txt = TP.main_pretreat() time_TP_end = time.time() list_keywords = gk.get_keywords() for z in list_keywords: list_count.append(int(0)) time_match_start = time.time() for i in list_keywords: for j in list_txt: if j == i: list_count[list_keywords.index(i)] +=1 else: continue time_match_end = time.time() data_output(list_keywords,list_count) print('预处理时间为:'+ str(time_TP_end - time_TP_start) + '\n') print('匹配的时间为:'+ str(time_match_end - time_match_start) + '\n') print('共用时:' + str(time_match_end+time_TP_end-time_match_start-time_TP_start) + '\n') return True
def cluster_score(user_desc, user_skills): user_keywords = list(get_keywords.get_keywords(user_desc)) user_keywords.extend([x.lower() for x in user_skills]) cwd = os.getcwd() os.chdir('../scripts') cluster_file = 'cluster_dump.txt' clusterDump = cPickle.load(open('data/'+cluster_file, 'r')) os.chdir(cwd) token_lists = clusterDump['tokens'] token_vector = array([0.0] * len(token_lists)) n = 0 ''' Creating the token vector for the current user ''' for token in token_lists: if token in user_keywords: token_vector[n] = 1.0 n += 1 centroids = clusterDump['centroids'] titles = clusterDump['job_titles'] recommendations = clusterDump['recos'] ''' Calculating similarity of current user's token vector with each of the centroids using Euclidean distance between the vectors and normalizing based on the magnitude of the centroids. ''' minV = centroids[0] - token_vector minMag = sqrt(minV.dot(minV)) cluster_value = 0 for i in range(1, len(centroids)): diff = centroids[i] - token_vector mag = sqrt(diff.dot(diff)) if mag < minMag: minMag = mag cluster_value = i Result = recommendations[cluster_value], titles[cluster_value] return Result
from collections import defaultdict, Counter import operator as op import cPickle kc = 8 # k-means no: of clusters tokenList = [] vectorList = [] profileVector = [] profileList = scrapy_reader.profile_dump for profile in profileList.itervalues(): tokenList.extend([x.lower() for x in profile['skills']]) if len(profile['experience']) != 0: exp = profile['experience'][0] if type(exp.desc) is not types.NoneType: descTokens = get_keywords.get_keywords(exp.desc) tokenList.extend(descTokens) tokenS = set(tokenList) tokenList = list(tokenS) ''' Forming token vectors and profiling of users ''' for profile in profileList.itervalues(): tokens = [] tokens.extend([x.lower() for x in profile['skills']]) if len(profile['experience']) != 0: exp = profile['experience'][0] if type(exp.desc) is not types.NoneType: descTokens = get_keywords.get_keywords(exp.desc) tokens.extend(descTokens) profileVector.append([exp.company.lower(), exp.postitle.lower()])
def Auto_PushReply(getvalue_dict,pushper_dict,threadcount): appid = getvalue_dict['appid'] authtype = getvalue_dict['authtype'] # SubData.initData(appid) ###初始化appid的统计,在Autoreply——Monior统一初始化 try: reply_dict, get_keywords_url = get_keywords.get_keywords(getvalue_dict) if reply_dict=='NULL': print 'totalCount=0,无关键词配置!' writelog.errorlog(get_keywords_url,'查看该appid是否有关键词配置','totalCount=0,无关键词配置!') static_result = SubData.data elif reply_dict=='FAIL': send_failEmail('获取关键词失败,请检查接口参数或接口请求是否正常:\n%s'%get_keywords_url) writelog.errorlog(get_keywords_url,'获取该appid的关键词', '获取关键词失败,请检查接口参数或接口请求是否正常!') static_result=SubData.Key_static(appid, '500') else: threads=[] for key in reply_dict.keys(): reply_relation = reply_dict[key] openid = getvalue_dict['openid'] to_xml = MesType.runtext_reply(key, openid) MENUNAME = reply_relation['MENUNAME'] inputword = '%s,\t菜单名称:%s' % (str(key),str(MENUNAME)) getvalue_dict['type'] = 1 getvalue_dict['content'] = key service_result, service_url = get_service.get_service(getvalue_dict) # print service_result if service_result == '进入客服': close_result, close_url = get_service.exitservice(getvalue_dict) print '\n%s\n该关键词触发了客服状态'%service_url writelog.errorlog(appid, reply_relation['MENUNAME'], '此为人工客服独立功能,暂不校验') while close_result == '进入客服': close_result, close_url = get_service.exitservice(getvalue_dict) break if close_result == '请求失败': close_result, close_url = get_service.exitservice(getvalue_dict) MENUNAME = reply_relation['MENUNAME'] # print "key:%s" % str(key) inputword = '%s,\t菜单名称:%s' % (str(key), str(MENUNAME)) print '\n该关键词判断客服状态失败,判断是否进入客服状态:%s\n判断是否退出成功:%s' % (close_url, close_result) send_emails.send_failEmail('该关键词退出客服状态失败:\n%s' % close_url) writelog.errorlog(str(service_url), '关键词名称:%s--该菜单退出客服状态失败,请检查该接口请求是否正常\n' % str(inputword), str(reply_relation['reply_content'])) SubData.Menu_static(appid, '500') else: SubData.Key_static(appid, '200') elif service_result == '请求失败': MENUNAME = reply_relation['MENUNAME'] # print "key:%s" % str(key) inputword = '%s,\t菜单名称:%s' % (str(key), str(MENUNAME)) print '\n%s\n该关键词判断客服状态失败' % (service_url) send_emails.send_failEmail('该关键词判断是否进入客服状态:%s\n' % service_url) writelog.errorlog(str(service_url), '关键词名称:%s--该菜单判断是否进入客服状态失败,请检查该接口请求是否正常' % str(inputword), str(reply_relation['reply_content'])) SubData.Key_static(appid, '500') else: if reply_relation['reply_content_type'] in ['2', '3', '4']: material_result, material_url = get_material.get_material(getvalue_dict,reply_relation['reply_content']) if material_result == 'NULL': SubData.Key_static(appid, '200') print '该material_id获取不到对应的素材:%s' % str(material_url) # sendEmail(request_url, request_xml, result_decrypt, str(inputword), str(reply_relation['reply_content']), '该material_id获取不到对应的素材!\n%s' %str( material_url)) writelog.errorlog(str(material_url), '关键词名称:%s--该material_id获取不到对应的素材' % str(inputword),str(reply_relation['reply_content'])) continue elif material_result == 'FAIL': SubData.Key_static(appid, '500') print '该material_id获取素材失败,请检查该接口请求是否正常:%s' % str(material_url) print reply_relation send_emails.send_failEmail('获取素材失败,请检查该接口请求是否正常%s\n' % str(material_url)) writelog.errorlog(str(material_url), '菜单名称:%s--获取素材失败,请检查该接口请求是否正常' % str(inputword),str(reply_relation['reply_content'])) continue else: pass else: pass # print '\n授权方式', authtype if authtype == '3': pushper_dict['General'] = {} pushper_dict['token']='irPNHoJN' elif authtype == '1': pushper_dict['token'] = getvalue_dict['authtoken'] General = {} General['uniqueid'] = getvalue_dict['uniqueid'] General['appid'] = getvalue_dict['appid'] pushper_dict['General'] = General # Pushdata.pushdata(pushper_dict, getvalue_dict, key, reply_relation, to_xml) obj = Pushdata.pushdata t=threading.Thread(target=obj,args=(pushper_dict, getvalue_dict, key,reply_relation, to_xml)) threads.append(t) for t in threads: t.setDaemon(True) t.start() while True: if(threading.activeCount() < threadcount): break # print 'threading.activeCount:',threading.activeCount() for t in threads: t.join() # time.sleep(0.02) static_result=SubData.data # print static_result return static_result except Exception, e: print Exception, ":", e static_result = SubData.data send_emails.send_failEmail('\n%s :%s' % (str(Exception), str(e))) return static_result
from collections import defaultdict, Counter import operator as op import cPickle kc = 8 # k-means no: of clusters tokenList = [] vectorList = [] profileVector = [] profileList = scrapy_reader.profile_dump for profile in profileList.itervalues(): tokenList.extend([x.lower() for x in profile['skills']]) if len(profile['experience']) != 0: exp = profile['experience'][0] if type(exp.desc) is not types.NoneType: descTokens = get_keywords.get_keywords(exp.desc) tokenList.extend(descTokens) tokenS = set(tokenList) tokenList = list(tokenS) ''' Forming token vectors and profiling of users ''' for profile in profileList.itervalues(): tokens = [] tokens.extend([x.lower() for x in profile['skills']]) if len(profile['experience']) != 0: exp = profile['experience'][0] if type(exp.desc) is not types.NoneType: descTokens = get_keywords.get_keywords(exp.desc) tokens.extend(descTokens)