def sensitive_word_filter(n, e_id, is_extend): get_sensitive_word(e_id, 1, 'not_sensitive_word.txt') dfa = DFA() weibo_utils = Weibo_utils() dfa.change_words('not_sensitive_word.txt') key_words, start_date, end_date, e_index = get_event_info(e_id) if n == 0: start_ts = date2ts(str(start_date)) if end_date == None: end_ts = time.time() else: end_ts = date2ts(str(end_date)) else: end_ts = int(time.mktime(datetime.date.today().timetuple())) start_ts = end_ts - 86400 data_dict = {} # num = 0 for data_list in querry(e_index, start_ts, end_ts): # num += 1 # print(num) for message in data_list: if not dfa.exists(message['text']): data_dict[message['mid']] = {} data_dict[message['mid']]['uid'] = message['uid'] data_dict[message['mid']]['text'] = message['text'] data_dict[message['mid']]['root_uid'] = message['root_uid'] data_dict[message['mid']]['root_mid'] = message['root_mid'] data_dict[message['mid']]['timestamp'] = message['timestamp'] data_dict[message['mid']]['send_ip'] = message['ip'] data_dict[message['mid']]['geo'] = message['geo'] data_dict[ message['mid']]['message_type'] = message['message_type'] data_dict[message['mid']]['source'] = message['source'] new_data_dict = {} save_data_dict = {} get_sensitive_word(e_id, 2, 'sensitive_word.txt') dfa.change_words('sensitive_word.txt') for mid in data_dict.keys(): text = weibo_utils.remove_nochn( weibo_utils.remove_c_t(data_dict[mid]['text'])) if dfa.exists(text): line = dfa.filter_all(text) line_list = line.split('\t') new_line = line_list[0] if len(line_list) != 1: for i in range(1, len(line_list)): if is_sexsitive_word(line_list[i], line_list[0]): save_data_dict[mid] = data_dict[mid] else: new_data_dict[mid] = data_dict[mid] else: new_data_dict[mid] = data_dict[mid] else: new_data_dict[mid] = data_dict[mid] if is_extend == 0: sensitivity_store(save_data_dict) event_sensitivity(e_id, save_data_dict) figure_add(save_data_dict, e_id) return new_data_dict
def event_cal_main(info, n, start_date, end_date): """ 事件主计算函数 :param info: 事件信息,包括id,name,index_name,开始结束时间等 :param n: 是否为新添加事件。1为日常计算,0为新添加 :param start_date: 计算开始时间 :param end_date: 计算结束时间 :return: 无 """ e_id = info['e_id'] e_name = info['event_name'] e_index = info['es_index_name'] # 获取事件相关计算参数,如果没有则初始化 try: SENTIMENT_NEG = get_event_para(e_id, 'sentiment_neg') except: SENTIMENT_NEG = 0.2 store_event_para(e_id, 'sentiment_neg') try: SENTIMENT_POS = get_event_para(e_id, 'sentiment_pos') except: SENTIMENT_POS = 0.7 store_event_para(e_id, 'sentiment_pos') try: POS_NEG = get_event_para(e_id, 'pos_neg') except: POS_NEG = 50 store_event_para(e_id, 'pos_neg') try: WEIBO_NUM = get_event_para(e_id, 'weibo_num') except: WEIBO_NUM = 100000 store_event_para(e_id, 'weibo_num') try: stop_percent = get_event_para(e_id, 'stop_percent') except: stop_percent = 0.05 store_event_para(e_id, 'stop_percent') try: EXTEND_SCALE = get_event_para(e_id, 'extend_scale') except: EXTEND_SCALE = 10 store_event_para(e_id, 'extend_scale') print('获取事件相关微博') # 获取事件相关微博,计算情感极性,并存入事件索引(没有索引就创建一个) save_event_data(e_id, n, SENTIMENT_POS, SENTIMENT_NEG) print('敏感词过滤,精确敏感信息入库') # 对新获取的事件相关微博进行敏感词过滤,并将包含精确敏感词的信息入库 data_dict = sensitive_word_filter(n, e_id, 0) print(len(data_dict)) print('敏感计算') # 对过滤后的结果进行敏感计算 if data_dict: data_dict = sensitivity(e_id, data_dict, e_index, POS_NEG, 0) # print(data_dict) print(len(data_dict)) if data_dict: print('敏感信息入库') # 敏感信息入库,敏感信息和事件关联入库 sensitivity_store(data_dict) event_sensitivity(e_id, data_dict) print('敏感人物入库') # 敏感人物入库,敏感人物和事件关联入库 figure_add(data_dict, e_id) print('事件计算') # 获取微博数据进行分析 # t0 = time.time() data_dict = get_event_data(e_index, start_date, end_date) # t1 = time.time() # print('取数据',t1-t0) max_num = 0 for date in data_dict: print(date) info_num = len(data_dict[date]) print(info_num) if info_num > max_num: max_num = info_num # 事件态势分析 event_analyze(e_id, data_dict[date], date) # t3 = time.time() # print('态势',t3-t2) # 事件特殊分析(hashtag、敏感词分布) event_hashtag_senwords(e_id, data_dict, n) # t4 = time.time() # print('特殊',t4-t3) data_dict, date = get_semantic_data(e_index, end_date) # 事件语义分析 # t1 = time.time() event_semantic(e_id, e_name, data_dict, date, WEIBO_NUM) # t2 = time.time() # print('语义',t2-t1) return max_num