示例#1
0
def sensitive_word_filter(n, e_id, is_extend):
    get_sensitive_word(e_id, 1, 'not_sensitive_word.txt')
    dfa = DFA()
    weibo_utils = Weibo_utils()
    dfa.change_words('not_sensitive_word.txt')
    key_words, start_date, end_date, e_index = get_event_info(e_id)
    if n == 0:
        start_ts = date2ts(str(start_date))
        if end_date == None:
            end_ts = time.time()
        else:
            end_ts = date2ts(str(end_date))
    else:
        end_ts = int(time.mktime(datetime.date.today().timetuple()))
        start_ts = end_ts - 86400
    data_dict = {}
    # num = 0
    for data_list in querry(e_index, start_ts, end_ts):
        # num += 1
        # print(num)
        for message in data_list:
            if not dfa.exists(message['text']):
                data_dict[message['mid']] = {}
                data_dict[message['mid']]['uid'] = message['uid']
                data_dict[message['mid']]['text'] = message['text']
                data_dict[message['mid']]['root_uid'] = message['root_uid']
                data_dict[message['mid']]['root_mid'] = message['root_mid']
                data_dict[message['mid']]['timestamp'] = message['timestamp']
                data_dict[message['mid']]['send_ip'] = message['ip']
                data_dict[message['mid']]['geo'] = message['geo']
                data_dict[
                    message['mid']]['message_type'] = message['message_type']
                data_dict[message['mid']]['source'] = message['source']
    new_data_dict = {}
    save_data_dict = {}
    get_sensitive_word(e_id, 2, 'sensitive_word.txt')
    dfa.change_words('sensitive_word.txt')
    for mid in data_dict.keys():
        text = weibo_utils.remove_nochn(
            weibo_utils.remove_c_t(data_dict[mid]['text']))
        if dfa.exists(text):
            line = dfa.filter_all(text)
            line_list = line.split('\t')
            new_line = line_list[0]
            if len(line_list) != 1:
                for i in range(1, len(line_list)):
                    if is_sexsitive_word(line_list[i], line_list[0]):
                        save_data_dict[mid] = data_dict[mid]
                    else:
                        new_data_dict[mid] = data_dict[mid]
            else:
                new_data_dict[mid] = data_dict[mid]
        else:
            new_data_dict[mid] = data_dict[mid]
    if is_extend == 0:
        sensitivity_store(save_data_dict)
        event_sensitivity(e_id, save_data_dict)
        figure_add(save_data_dict, e_id)
    return new_data_dict
示例#2
0
def event_cal_main(info, n, start_date, end_date):
    """
    事件主计算函数
    :param info: 事件信息,包括id,name,index_name,开始结束时间等
    :param n: 是否为新添加事件。1为日常计算,0为新添加
    :param start_date: 计算开始时间
    :param end_date: 计算结束时间
    :return: 无
    """
    e_id = info['e_id']
    e_name = info['event_name']
    e_index = info['es_index_name']

    # 获取事件相关计算参数,如果没有则初始化
    try:
        SENTIMENT_NEG = get_event_para(e_id, 'sentiment_neg')
    except:
        SENTIMENT_NEG = 0.2
        store_event_para(e_id, 'sentiment_neg')
    try:
        SENTIMENT_POS = get_event_para(e_id, 'sentiment_pos')
    except:
        SENTIMENT_POS = 0.7
        store_event_para(e_id, 'sentiment_pos')
    try:
        POS_NEG = get_event_para(e_id, 'pos_neg')
    except:
        POS_NEG = 50
        store_event_para(e_id, 'pos_neg')
    try:
        WEIBO_NUM = get_event_para(e_id, 'weibo_num')
    except:
        WEIBO_NUM = 100000
        store_event_para(e_id, 'weibo_num')
    try:
        stop_percent = get_event_para(e_id, 'stop_percent')
    except:
        stop_percent = 0.05
        store_event_para(e_id, 'stop_percent')
    try:
        EXTEND_SCALE = get_event_para(e_id, 'extend_scale')
    except:
        EXTEND_SCALE = 10
        store_event_para(e_id, 'extend_scale')

    print('获取事件相关微博')
    # 获取事件相关微博,计算情感极性,并存入事件索引(没有索引就创建一个)
    save_event_data(e_id, n, SENTIMENT_POS, SENTIMENT_NEG)

    print('敏感词过滤,精确敏感信息入库')
    # 对新获取的事件相关微博进行敏感词过滤,并将包含精确敏感词的信息入库
    data_dict = sensitive_word_filter(n, e_id, 0)
    print(len(data_dict))

    print('敏感计算')
    # 对过滤后的结果进行敏感计算
    if data_dict:
        data_dict = sensitivity(e_id, data_dict, e_index, POS_NEG, 0)
    # print(data_dict)
    print(len(data_dict))

    if data_dict:
        print('敏感信息入库')
        # 敏感信息入库,敏感信息和事件关联入库
        sensitivity_store(data_dict)
        event_sensitivity(e_id, data_dict)

        print('敏感人物入库')
        # 敏感人物入库,敏感人物和事件关联入库
        figure_add(data_dict, e_id)

    print('事件计算')
    # 获取微博数据进行分析
    # t0 = time.time()
    data_dict = get_event_data(e_index, start_date, end_date)
    # t1 = time.time()
    # print('取数据',t1-t0)

    max_num = 0
    for date in data_dict:
        print(date)
        info_num = len(data_dict[date])
        print(info_num)
        if info_num > max_num:
            max_num = info_num
        # 事件态势分析
        event_analyze(e_id, data_dict[date], date)
        # t3 = time.time()
        # print('态势',t3-t2)

    # 事件特殊分析(hashtag、敏感词分布)
    event_hashtag_senwords(e_id, data_dict, n)
    # t4 = time.time()
    # print('特殊',t4-t3)

    data_dict, date = get_semantic_data(e_index, end_date)
    # 事件语义分析
    # t1 = time.time()
    event_semantic(e_id, e_name, data_dict, date, WEIBO_NUM)
    # t2 = time.time()
    # print('语义',t2-t1)

    return max_num