Exemplo n.º 1
0
def compare_theme_k_label(theme_name1, theme_name2, submit_user, flag):
    topic_id1 = p.get_pinyin(theme_name1)
    eid_string1 = es_event.get(index=special_event_name,
                               doc_type=special_event_type,
                               id=topic_id1,
                               fields=['k_label'])
    label_list1 = eid_string1['fields']['k_label'][0].split('&')

    topic_id2 = p.get_pinyin(theme_name2)
    eid_string2 = es_event.get(index=special_event_name,
                               doc_type=special_event_type,
                               id=topic_id2,
                               fields=['k_label'])
    label_list2 = eid_string2['fields']['k_label'][0].split('&')
    if flag == 'all':
        new_label_list1 = [i for i in set(label_list1)]
        new_label_list2 = [i for i in set(label_list2)]

    if flag == 'same':
        same_u = set(label_list1) & set(label_list2)
        same_u = [i for i in same_u]
        new_label_list1 = same_u
        new_label_list2 = same_u

    if flag == 'diff':
        diff_u1 = set(label_list1) - (set(label_list1) & set(label_list2))
        new_label_list1 = [i for i in diff_u1]

        diff_u2 = set(label_list2) - (set(label_list1) & set(label_list2))
        new_label_list2 = [i for i in diff_u2]

    return [new_label_list1, new_label_list2]
Exemplo n.º 2
0
def compare_theme(theme_name1, theme_name2, submit_user, flag):
    if flag == 'all':
        detail_result1 = query_detail_theme(theme_name1, submit_user)
        detail_result2 = query_detail_theme(theme_name2, submit_user)
        return {'detail_result1':detail_result1,'detail_result2':detail_result2}
    else:
        topic_id1 = p.get_pinyin(theme_name1)
        topic_id1 = topic_id1.lower()
        eid_string1 = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id1,  fields=['event'])
        event_list1 = eid_string1['fields']['event'][0].split('&')
        topic_id2 = p.get_pinyin(theme_name2)
        topic_id2 = topic_id2.lower()
        eid_string2 = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id2,  fields=['event'])
        event_list2 = eid_string2['fields']['event'][0].split('&')
        if flag == 'same':
            same_e = set(event_list1)&set(event_list2)
            same_e = [i for i in same_e]
            detail_result1 = event_detail_search(same_e,submit_user)
            detail_result2 = event_detail_search(same_e,submit_user)
        if flag == 'diff':
            diff_e1 = set(event_list1) - (set(event_list1)&set(event_list2))
            diff_e1 = [i for i in diff_e1]
            diff_e2 = set(event_list2) - (set(event_list1)&set(event_list2))
            diff_e2 = [i for i in diff_e2]
            detail_result1 = event_detail_search(diff_e1,submit_user)
            detail_result2 = event_detail_search(diff_e2,submit_user)
        return {'detail_result1':detail_result1,'detail_result2':detail_result2}
Exemplo n.º 3
0
def compare_theme_user(theme_name1, theme_name2, submit_user, flag):
    topic_id1 = p.get_pinyin(theme_name1)
    topic_id1 = topic_id1.lower()
    eid_string1 = es_event.get(index=special_event_name,
                               doc_type=special_event_type,
                               id=topic_id1,
                               fields=['event'])
    event_list1 = eid_string1['fields']['event'][0].split('&')

    topic_id2 = p.get_pinyin(theme_name2)
    topic_id2 = topic_id2.lower()
    eid_string2 = es_event.get(index=special_event_name,
                               doc_type=special_event_type,
                               id=topic_id2,
                               fields=['event'])
    event_list2 = eid_string2['fields']['event'][0].split('&')

    event_list_all = [event_list1, event_list2]
    uid_list = []
    for event_result in event_list_all:
        uid_list1 = []
        print event_result
        for event in event_result:
            event_value = event
            # event_list.append(event_value)
            c_string = 'START s0 = node:event_index(event_id="' + str(
                event_value) + '") '
            c_string += 'MATCH (s0)-[r]-(s1:User) return s1 LIMIT 50'
            print c_string
            result = graph.run(c_string)
            for i in list(result):
                end_id = dict(i['s1'])
                uid_list1.append(end_id['uid'])
        uid_list.append(uid_list1)
    # return uid_list
    ##对于实际的人怎么处理?
    if flag == 'all':
        uid_list1 = [i for i in set(uid_list[0])]
        uid_list2 = [i for i in set(uid_list[1])]
        detail_result1 = user_detail_search(uid_list1, submit_user)
        detail_result2 = user_detail_search(uid_list2, submit_user)

    if flag == 'same':
        same_u = set(uid_list[0]) & set(uid_list[1])
        same_u = [i for i in same_u]
        detail_result1 = user_detail_search(same_u, submit_user)
        detail_result2 = user_detail_search(same_u, submit_user)

    if flag == 'diff':
        diff_u1 = set(uid_list[0]) - (set(uid_list[0]) & set(uid_list[1]))
        diff_u1 = [i for i in diff_u1]
        diff_u2 = set(uid_list[1]) - (set(uid_list[0]) & set(uid_list[1]))
        diff_u2 = [i for i in diff_u2]
        detail_result1 = user_detail_search(diff_u1, submit_user)
        detail_result2 = user_detail_search(diff_u2, submit_user)
    return {'detail_result1': detail_result1, 'detail_result2': detail_result2}
Exemplo n.º 4
0
def select_people_es(result):
    list = []
    for ls in result:
        uid = dict(ls)["n.uid"]
        item = es.get(index=portrait_name, doc_type=portrait_type, id=uid)
        people_dict["id"] = result["_id"]
        item = item["_source"]
        people_dict["domain"] = item["domain"]
        people_dict["influence"] = item["influence"]
        people_dict["uname"] = item["uname"]
        people_dict["sensitive_string"] = item["sensitive_string"]
        people_dict["activity_geo_aggs"] = item["activity_geo_aggs"]
        people_dict["importnace"] = item["importnace"]
        people_dict["activeness"] = item["activeness"]
        people_dict["location"] = item["location"]
        people_dict["importance"] = item["importance"]
        people_dict["hashtag"] = item["hashtag"]
        people_dict["photo_url"] = item["photo_url"]
        people_dict["topic_string"] = item["topic_string"]
        people_dict["friendsnum"] = item["friendsnum"]
        people_dict["create_time"] = item["create_time"]
        people_dict["description"] = item["description"]
        people_dict["create_user"] = item["create_user"]
        people_dict["tag"] = item["tag"]
        list.append(people_dict)
    return list
Exemplo n.º 5
0
def get_theme_flow(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id,  fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    query_body = {
        'query':{
            'terms':{'en_name':event_list}
            },
        "sort": [{'start_ts':'asc'}]
    }
    name_list = es_event.search(index=event_analysis_name, doc_type=event_text_type, \
                body=query_body,  fields=['name', 'en_name'])['hits']['hits']
    query_body2 = {
        'query':{"match_all":{}},
        "sort": [{'retweeted':'desc'}],
        'size':1
    }
    event_name_list = []
    for i in name_list:
        event_name_list.append(i['fields']['en_name'][0])
    print event_name_list
    result_list = []
    for i in event_name_list:
        max_retweet = es_event.search(index=i, doc_type='text', body=query_body2, \
            fields=['text', 'timestamp'])['hits']['hits']
        print max_retweet,'00000000000'
        text = max_retweet[0]['fields']['text'][0]
        t_datetime = ts2date(max_retweet[0]['fields']['timestamp'][0])
        result_list.append([i, text, t_datetime])
    return result_list
Exemplo n.º 6
0
def submit_event(input_data):
    if not input_data.has_key('name'):
        input_data['name'] = input_data['keywords']

    if input_data.has_key('mid'):
        # event_id = mid
        input_data['en_name'] = input_data['mid']
        del input_data['mid']
    else:
        e_name = input_data['name']
        e_name_string = ''.join(e_name.split('&'))
        event_id = p.get_pinyin(e_name_string) + '-' + str(
            input_data['event_ts'])  #+str(int(time.time()))
        input_data['en_name'] = event_id

    if not input_data.has_key('start_ts'):
        start_ts = input_data['event_ts'] - 2 * DAY
        input_data['start_ts'] = start_ts
    if not input_data.has_key('end_ts'):
        end_ts = input_data['event_ts'] + 5 * DAY
        input_data['end_ts'] = end_ts
    input_data['submit_ts'] = int(time.time())
    del input_data['event_ts']
    try:
        result = es_event.get(index=event_task_name,
                              doc_type=event_task_type,
                              id=input_data['en_name'])['_source']
        return 'already in'
    except:
        es_event.index(index=event_task_name,
                       doc_type=event_task_type,
                       id=input_data['en_name'],
                       body=input_data)
    return True
Exemplo n.º 7
0
def del_e_theme_rel(theme_name, event_id):
    en_name = p.get_pinyin(theme_name)
    s_string = 'START s0 = node:special_event_index(event="%s"),s3 = node:event_index(event_id="%s")\
                MATCH (s0)-[r:special_event]-(s3) DELETE r' % (en_name,
                                                               event_id)
    print s_string
    graph.run(s_string)

    event_list_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=en_name,\
                            fields=['event'])
    eid_list = event_list_string['fields']['event'][0].split('&')
    new_eid_list = set(eid_list) - set([event_id])
    new_eid_list = [i for i in new_eid_list]
    eid_string = '&'.join(new_eid_list)
    if len(new_eid_list) == 0:
        s_string = 'START s0 = node:special_event_index(event="%s") DELETE s0' % (
            en_name)
        graph.run(s_string)
        es_event.delete(index=special_event_name,
                        doc_type=special_event_type,
                        id=en_name)
    else:
        es_event.update(index=special_event_name,doc_type=special_event_type,id=en_name,\
            body={'doc':{'event':eid_string, 'event_count':len(new_eid_list)}})
    return 'true'
Exemplo n.º 8
0
def query_detail_theme(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    # topic_id = topic_id + '_' + submit_user
    eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id,  fields=['event'])
    eid_list = eid_string['fields']['event'][0].split('&')
    result = event_detail_search(eid_list, submit_user)
    return result
Exemplo n.º 9
0
def get_theme_user_tag(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id,  fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    user_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['user_results','name'])['docs']
    user_list =[]
    for i in user_result:
        event_name = i['fields']['name'][0]
        user_dict = json.loads(i['fields']['user_results'][0])
        for k,v in user_dict.iteritems():
            user_list.append(k)
    user_list_set = [i for i in set(user_list)]

    tag_result = es.mget(index=portrait_index_name, doc_type=portrait_index_type, \
            body={'ids':user_list_set}, fields=['function_mark', 'keywords'])['docs']
    keywords_dict = {}
    mark_dict = {}
    print len(tag_result)
    for i in tag_result:
        i_keywords = json.loads(i['fields']['keywords'][0])
        try:
            i_mark = i['fields']['function_mark'][0]
        except:
            i_mark = ''
        for key in i_keywords:
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
        if i_mark:
            user_mark = deal_user_tag(i_mark, submit_user)[0]
            for mark in user_mark:
                try:
                    mark_dict[mark] += 1
                except:
                    mark_dict[mark] = 1
    sorted_keywords_dict = sorted(keywords_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    sorted_mark_dict = sorted(mark_dict.iteritems(), key=lambda x:x[1], reverse=True)[:100]
    
    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append([words[0], float(words[1])/max_keywords_value])

    try:
        max_mark_value = sorted_mark_dict[0][1]
    except:
        max_mark_value = 1.0
    normal_mark_list = []
    for words in sorted_mark_dict:
        normal_mark_list.append([words[0], float(words[1])/max_mark_value])

    return {'keywords':normal_keywords_list, 'mark':normal_mark_list}
Exemplo n.º 10
0
def get_theme_net(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['en_name', 'name'])['docs']
    event_name_dict = {}
    for i in event_result:
        event_en_name = i['fields']['en_name'][0]
        event_name = i['fields']['name'][0]
        event_name_dict[event_en_name] = event_name
    event_graph_id = []
    for i in event_list:
        a = graph.run('start n=node:' + event_index_name + '("' +
                      event_primary + ':' + str(i) + '") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))
    # print event_graph_id
    event_id_string = ','.join(event_graph_id)
    query = 'start d=node(' + event_id_string + '),e=node(' + event_id_string + ') match (d)-[r]->(e) return d,type(r),e'
    result = graph.run(query)
    exist_relation = []
    exist_relation_string = []
    for i in result:
        # print i
        dict_i = dict(i)
        start_id = dict_i['d']['event_id']
        end_id = dict_i['e']['event_id']
        exist_relation.append([event_name_dict[start_id], relation_dict[dict_i['type(r)']], \
                    event_name_dict[end_id]])
        # print exist_relation
        relation_string = start_id + '-' + end_id
        exist_relation_string.append(relation_string)
    set_exist_relation = set(exist_relation_string)
    relation_set_count = len(list(set_exist_relation))
    node_count = len(event_list)
    total_count = node_count * (node_count - 1) / 2
    try:
        relation_degree = float(relation_set_count) / total_count
    except:
        relation_degree = 0
    if relation_degree == 0:
        conclusion = u'无关联'
    elif relation_degree < 0.33 and relation_degree > 0:
        conclusion = u'关联度较低'
    elif relation_degree >= 0.33 and relation_degree < 0.66:
        conclusion = u'关联度适中'
    elif relation_degree >= 0.66:
        conclusion = u'联系紧密'  ##未定义!!
    return {'relation_table':exist_relation, 'relation_count':relation_set_count,\
        'conclusion':conclusion, 'relation_degree':relation_degree}
Exemplo n.º 11
0
def get_theme_geo(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['geo_results', 'name'])['docs']
    city_dict = {}
    event_city = {}
    event_name_list = []
    for i in event_result:
        if not i['found']:
            continue
        event_name = i['fields']['name'][0]
        event_city[event_name] = {}
        event_name_list.append(event_name)
        geo_event = json.loads(i['fields']['geo_results'][0])
        # print geo_event
        for k, v in geo_event.iteritems():
            for province_k, city_v in v.iteritems():
                for city_name, city_count in city_v.iteritems():
                    if city_name == 'total' or city_name == 'unknown':
                        continue
                    try:
                        city_dict[city_name] += city_count
                    except:
                        city_dict[city_name] = city_count
                    try:
                        event_city[event_name][city_name] += city_count
                    except:
                        event_city[event_name][city_name] = city_count

    sorted_city_dict = sorted(city_dict.iteritems(),
                              key=lambda x: x[1],
                              reverse=True)[:10]
    top_city = [i[0] for i in sorted_city_dict]
    final_city_count = {}
    for city in event_name_list:
        final_city_count[city] = []
        print final_city_count, 'final_city_count'
        for i in top_city:
            if event_city[city].has_key(i):
                final_city_count[city].append(event_city[city][i])
            else:
                final_city_count[city].append(0)

    return {'top_city': top_city, 'event_city': final_city_count}
Exemplo n.º 12
0
def show_theme_file_link(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id,  fields=['event','wiki_link', 'file_link'])
    event_list = eid_string['fields']['event'][0].split('&')
    origin_event = event_list
    try:
        file_link = eid_string['fields']['file_link'][0].split('+')
    except:
        file_link = []
    final_file = []
    for i in file_link:
        final_file.append(i.split(','))
    return final_file
Exemplo n.º 13
0
def get_theme_keywords(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['keywords_list', 'hashtag_dict'])['docs']
    keywords_dict = {}
    hash_dict = {}
    for i in event_result:
        i_keywords = json.loads(i['fields']['keywords_list'][0])
        i_hashtag = json.loads(i['fields']['hashtag_dict'][0])
        for key in i_keywords:
            # print key,'====='
            try:
                keywords_dict[key[0]] += key[1]
            except:
                keywords_dict[key[0]] = key[1]
        for k, v in i_hashtag.iteritems():
            try:
                hash_dict[k] += v
            except:
                hash_dict[k] = v
    sorted_keywords_dict = sorted(keywords_dict.iteritems(),
                                  key=lambda x: x[1],
                                  reverse=True)[:100]
    try:
        max_keywords_value = sorted_keywords_dict[0][1]
    except:
        max_keywords_value = 1.0
    normal_keywords_list = []
    for words in sorted_keywords_dict:
        normal_keywords_list.append(
            [words[0], float(words[1]) / max_keywords_value])

    sorted_hash_dict = sorted(hash_dict.iteritems(),
                              key=lambda x: x[1],
                              reverse=True)[:100]
    try:
        max_hash_value = sorted_hash_dict[0][1]
    except:
        max_hash_value = 1.0
    normal_hash_list = []
    for words in sorted_hash_dict:
        normal_hash_list.append([words[0], float(words[1]) / max_hash_value])
    return {'keywords': normal_keywords_list, 'hashtag': normal_hash_list}
Exemplo n.º 14
0
def create_theme_relation(node_key1, node1_list, node1_index_name, rel, node_key2, node2_id, node2_index_name, submit_user):
    node2_id_pinyin = p.get_pinyin(node2_id)
    node2_id_pinyin = node2_id_pinyin.lower()
    event_list_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=node2_id_pinyin,\
                                fields=['event'])
    eid_list = []
    eid_list = event_list_string['fields']['event'][0].split('&')
    eid_list.extend(node1_list)
    eid_list = [i for i in set(eid_list)]
    eid_string = '&'.join(eid_list)
    # print eid_string
    es_event.update(index=special_event_name,doc_type=special_event_type,id=node2_id_pinyin,\
            body={'doc':{'event':eid_string, 'event_count':len(eid_list)}})
    flag = create_rel(node_key1, node1_list, node1_index_name, rel, node_key2, node2_id_pinyin, node2_index_name, submit_user)
    return flag
Exemplo n.º 15
0
def deal_event_tag(item, submit_user):
    tag = es_event.get(index=event_analysis_name,
                       doc_type=event_text_type,
                       id=item)['_source']['work_tag'][0]
    # return result
    # tag = tag_value
    print tag, '=============!!==='
    tag_list = tag.split('&')
    left_tag = []
    keep_tag = []
    for i in tag_list:
        user_tag = i.split('_')
        if user_tag[0] == submit_user:
            keep_tag.append(user_tag[1])
        else:
            left_tag.append(i)
    return [keep_tag, left_tag]
Exemplo n.º 16
0
def get_theme_user_rank(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    user_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':event_list}, fields=['user_results','name'])['docs']
    user_influence = {}
    for i in user_result:
        # print i
        event_name = i['fields']['name'][0]
        user_dict = json.loads(i['fields']['user_results'][0])
        for k, v in user_dict.iteritems():
            if user_influence.has_key(k):
                continue
            user_influence[k] = {}
            user_influence[k]['id'] = k
            user_influence[k]['name'] = user_name_search(k)
            user_influence[k]['node_type'] = search_type(k)

    for i in user_result:
        event_name = i['fields']['name'][0]
        user_dict = json.loads(i['fields']['user_results'][0])
        for k, v in user_dict.iteritems():
            try:
                user_influence[k]['related_event'].append(event_name)
            except:
                user_influence[k]['related_event'] = []
                user_influence[k]['related_event'].append(event_name)
            try:
                user_influence[k]['influ'] += v['influ']
            except:
                user_influence[k]['influ'] = v['influ']
    user_influence_list = []
    for k, v in user_influence.iteritems():
        user_influence_list.append(v)
    sorted_user_influ = sorted(user_influence_list,
                               key=lambda x: x['influ'],
                               reverse=True)
    max_importance = sorted_user_influ[0]['influ']
    for i in sorted_user_influ:
        i['influ'] = float(i['influ']) / max_importance
    return sorted_user_influ
Exemplo n.º 17
0
def update_event(event_id):
    result = es_event.get(index=event_task_name,
                          doc_type=event_task_type,
                          id=event_id)['_source']
    # print result
    now_ts = int(time.time())
    if result['end_ts'] < now_ts:
        es_event.update(index=event_task_name,
                        doc_type=event_task_type,
                        id=event_id,
                        body={'doc': {
                            'end_ts': now_ts
                        }})

    os.system(
        "nohup python ./knowledge/cron/event_analysis/event_compute.py imme %s &"
        % event_id)
Exemplo n.º 18
0
def search_related_event(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    topic_id = topic_id.lower()
    eid_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=topic_id,  fields=['event'])
    event_list = eid_string['fields']['event'][0].split('&')
    related_list = []
    for en_name in event_list:
        s_string = 'START s0 = node:event_index(event_id="%s") \
                MATCH (s0)-[r]-(s3:Event) return s3' %(en_name)
        print s_string
        result = graph.run(s_string)
        for item in result:
            item_dict = dict(item)
            related_list.append(item_dict['s3']['event_id'])
    related_list = set(related_list) - set(event_list)
    related_list = [i for i in related_list]
    result = event_detail_search(related_list, submit_user)
    return result
Exemplo n.º 19
0
def submit_event(input_data):
    print input_data, '555555555555555'
    if not input_data.has_key('name'):
        name_s = input_data['keywords'].split('&')[:3]
        print name_s, '==='
        name_string = '&'.join(name_s)
        input_data['name'] = name_string
    if input_data.has_key('mid'):
        # event_id = mid
        input_data['en_name'] = input_data['mid']
        del input_data['mid']
    else:
        e_name = input_data['name']
        e_name_string = ''.join(e_name.split('&'))
        event_id = p.get_pinyin(e_name_string) + '-' + str(
            input_data['event_ts'])  #+str(int(time.time()))
        input_data['en_name'] = event_id

    if not input_data.has_key('start_ts'):
        start_ts = input_data['event_ts'] - 2 * DAY
        input_data['start_ts'] = start_ts
    if not input_data.has_key('end_ts'):
        end_ts = input_data['event_ts'] + 5 * DAY
        input_data['end_ts'] = end_ts
    input_data['submit_ts'] = int(time.time())
    del input_data['event_ts']
    # result = es_event.delete(index=event_task_name, doc_type=event_task_type, id=input_data['en_name'])
    try:
        result = es_event.get(index=event_task_name,
                              doc_type=event_task_type,
                              id=input_data['en_name'])['_source']
        return '0'
    except:
        es_event.index(index=event_task_name,
                       doc_type=event_task_type,
                       id=input_data['en_name'],
                       body=input_data)
        if input_data['immediate_compute'] == '1':
            os.system(
                "nohup python ./knowledge/cron/event_analysis/event_compute.py imme %s &"
                % event_id)
    return '1'
Exemplo n.º 20
0
def add_theme_file_link(theme_name, file_name, operation):
    new_label = file_name.split('+')
    en_name = p.get_pinyin(theme_name)
    print en_name
    theme_label = es_event.get(index=special_event_name, doc_type=special_event_type, id=en_name,\
            fields=['file_link'])
    print theme_label, '------------'
    try:
        theme_label_list = theme_label['fields']['file_link'][0].split('+')
    except:
        theme_label_list = []
    if operation == 'add':
        theme_label_list.extend(new_label)
    elif operation == 'del':
        theme_label_list = set(theme_label_list) - set(new_label)
    theme_label_list = [i for i in set(theme_label_list)]
    theme_label_string = '+'.join(theme_label_list)
    es_event.update(index=special_event_name,doc_type=special_event_type,id=en_name,\
            body={'doc':{'file_link':theme_label_string}})
    return True
Exemplo n.º 21
0
def select_event_es(result):
    print "111"
    list = []
    for ls in result:
        event_dict = {}
        uid = dict(ls)["n"]["event_id"]
        item = es.get(index=event_analysis_name, doc_type=event_type, id=uid)
        event_dict["id"] = item["_id"]
        item = item["_source"]
        event_dict["name"] = item["name"]
        event_dict["weibo_counts"] = item["weibo_counts"]
        event_dict["uid_counts"] = item["uid_counts"]
        event_dict["start_ts"] = item["start_ts"]
        event_dict["location"] = item["location"]
        event_dict["tag"] = item["tag"]
        event_dict["description"] = item["description"]
        event_dict["submit_ts"] = item["submit_ts"]
        event_dict["end_ts"] = item["end_ts"]
        print event_dict
        list.append(event_dict)
    return list
Exemplo n.º 22
0
def get_theme_related(theme_name, submit_user):
    topic_id = p.get_pinyin(theme_name)
    eid_string = es_event.get(index=special_event_name,
                              doc_type=special_event_type,
                              id=topic_id,
                              fields=['event', 'wiki_link', 'file_link'])
    event_list = eid_string['fields']['event'][0].split('&')
    try:
        file_link = eid_string['fields']['file_link'][0].split('+')
    except:
        file_link = []
    final_file = []
    for i in file_link:
        final_file.append(i.split(','))
    try:
        final_wiki = json.loads(eid_string['fields']['wiki_link'][0])
    except:
        final_wiki = []
    event_graph_id = []
    for i in event_list:
        a = graph.run('start n=node:' + event_index_name + '("' +
                      event_primary + ':' + str(i) + '") return id(n)')
        for j in a:
            event_graph_id.append(str(dict(j)['id(n)']))
    print event_graph_id
    event_id_string = ','.join(event_graph_id)
    query = 'start d=node(' + event_id_string + ') match (d)-[r]-(e) return labels(e), e'
    result = graph.run(query)
    node_dict = {}
    for i in result:
        dict_i = dict(i)
        node_type = dict_i['labels(e)'][0]

        if node_type == people_node:
            node_id = dict_i['e']['uid']
            try:
                node_dict['user'].append(node_id)
            except:
                node_dict['user'] = []
                node_dict['user'].append(node_id)
        elif node_type == org_node:
            node_id = dict_i['e']['org_id']
            try:
                node_dict['org'].append(node_id)
            except:
                node_dict['org'] = []
                node_dict['org'].append(node_id)

        elif node_type == event_node:
            node_id = dict_i['e']['event_id']
            if node_id in event_graph_id:
                continue
            try:
                node_dict['event'].append(node_id)
            except:
                node_dict['event'] = []
                node_dict['event'].append(node_id)
    uid_list = [i for i in set(node_dict['user'])]
    org_list = [i for i in set(node_dict['org'])]
    event_list = [i for i in set(node_dict['event'])]
    user_result = es.mget(index=portrait_index_name,
                          doc_type=portrait_index_type,
                          body={'ids': uid_list},
                          fields=['uname', 'uid'])['docs']
    org_result = es.mget(index=portrait_index_name,
                         doc_type=portrait_index_type,
                         body={'ids': org_list},
                         fields=['uname', 'uid'])['docs']
    event_result = es_event.mget(index=event_analysis_name,
                                 doc_type=event_text_type,
                                 body={'ids': event_list},
                                 fields=['en_name', 'name'])['docs']
    final_user = []
    for i in user_result:
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_user.append([i['fields']['uid'][0], uname_s])
        else:
            final_user.append([i['_id'], i['_id']])

    final_org = []
    for i in org_result:
        if i['found'] == True:
            if i['fields']['uname'][0] == '':
                uname_s = i['fields']['uid'][0]
            else:
                uname_s = i['fields']['uname'][0]
            final_org.append([i['fields']['uid'][0], uname_s])
        else:
            final_org.append([i['_id'], i['_id']])

    final_event = []
    for i in event_result:
        if i['found'] == True:
            final_org.append(
                [i['fields']['en_name'][0], i['fields']['name'][0]])
        else:
            final_org.append([i['_id'], i['_id']])
    return [final_user, final_org, final_event, final_file, final_wiki]
Exemplo n.º 23
0
def search_related_e_card(item, submit_user, theme_name):
    if theme_name:
        theme_name = theme_name + '_' + submit_user
        theme_name_pinyin = p.get_pinyin(theme_name)
        event_list_string = es_event.get(index=special_event_name, doc_type=special_event_type, id=theme_name_pinyin,\
                            fields=['event'])
        eid_list = []
        eid_list = event_list_string['fields']['event'][0].split('&')
    else:
        eid_list = []

    query_body = {
        "query": {
            'bool': {
                'should': [{
                    "wildcard": {
                        'keywords': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'en_name': '*' + str(item.encode('utf-8')) + '*'
                    }
                }, {
                    "wildcard": {
                        'name': '*' + str(item.encode('utf-8')) + '*'
                    }
                }]
            }
        },
        'size': 1000
    }
    fields_list = [
        'en_name', 'name', 'event_type', 'real_time', 'real_geo', 'uid_counts',
        'weibo_counts', 'keywords', 'work_tag'
    ]
    only_eid = []
    event_id_list = []
    u_nodes_list = {}
    e_nodes_list = {}
    event_relation = []
    try:
        event_result = es_event.search(index=event_analysis_name, doc_type=event_text_type, \
                body=query_body, fields=['en_name'])['hits']['hits']
    except:
        return 'node does not exist'
    # print event_result
    search_eid = []
    result = []
    for i in event_result:
        i_fields = i['fields']
        search_eid.append(i_fields['en_name'][0])
    show_id_set = set(search_eid) - set(eid_list)
    show_id = [i for i in show_id_set]
    if not show_id:
        return []
    event_result = es_event.mget(index=event_analysis_name, doc_type=event_text_type, \
                body={'ids':show_id}, fields=fields_list)['docs']
    for i in event_result:
        event = []
        i_fields = i['fields']
        for j in fields_list:
            if not i_fields.has_key(j):
                event.append('')
                continue
            if j == 'keywords':
                keywords = i_fields[j][0].split('&')
                keywords = keywords[:5]
                event.append(keywords)
            elif j == 'work_tag':
                tag = deal_event_tag(i_fields[j][0], submit_user)[0]
                event.append(tag)
            else:
                event.append(i_fields[j][0])
        result.append(event)
    return result
Exemplo n.º 24
0
def group_event_rank(g_name, submit_user):
    group_id = p.get_pinyin(g_name)
    group_id = group_id.lower()
    uid_string = es_group.get(index=group_name,
                              doc_type=group_type,
                              id=group_id,
                              fields=['people'])
    uid_list = uid_string['fields']['people'][0].split('&')
    related_event_list = []
    event_user_dict = {}
    for uid in uid_list:
        c_string = 'start n=node:' + node_index_name + '("' + people_primary + ':' + str(
            uid) + '") match (n)-[r]-(e:Event) return e'
        result = graph.run(c_string)
        for event in result:
            print event, '---------'
            # if event:
            event_dict = dict(event)
            event_id = event_dict['e']['event_id']
            related_event_list.append(event_id)
            try:
                event_user_dict[event_id].append(uid)
            except:
                event_user_dict[event_id] = []
                event_user_dict[event_id].append(uid)
    event_rank_list = []
    for k, v in event_user_dict.iteritems():
        k_dict = {}
        event_result = es_event.get(index=event_analysis_name,
                                    doc_type=event_text_type,
                                    id=k,
                                    fields=['user_results', 'name'])
        event_rank = event_result['fields']['user_results'][0]
        event_name = event_result['fields']['name'][0]
        user_results = json.loads(event_rank)
        k_dict['event_id'] = k
        k_dict['event_name'] = event_name
        k_dict['user'] = v
        k_dict['influ'] = 0
        print k
        for u in v:
            print u
            # if not user_results.has_key(u):
            #     continue
            try:
                influ_val = user_results[u]['influ']
            except:
                print u, '00000'
                influ_val = 10.0
            k_dict['influ'] += influ_val
        event_rank_list.append(k_dict)
    # print event_rank_list,'event_rank_list'
    sorted_event = sorted(event_rank_list,
                          key=lambda x: x['influ'],
                          reverse=True)
    try:
        max_value = sorted_event[0]['influ']
    except:
        return []
    final_event_rank = []
    for ii in sorted_event:
        ii['influ'] = float(ii['influ']) / max_value
        final_event_rank.append(ii)
    return final_event_rank