示例#1
0
文件: utils.py 项目: yuanhuiru/xnr2
def get_save_step_three_2(task_detail):
    task_id = task_detail['task_id']
    # nick_name = task_detail['nick_name']
    try:
        item_fans_followers = dict()
        followers_uids = list(set(task_detail['followers_uids'].split(',')))
        #         item_fans_followers['followers_list'] = followers_uids
        item_fans_followers['fans_list'] = followers_uids

        item_fans_followers['xnr_user_no'] = task_id
        print es.index(index=fb_xnr_fans_followers_index_name,
                       doc_type=fb_xnr_fans_followers_index_type,
                       id=task_id,
                       body=item_fans_followers)
        #把关注任务加到redis队列中
        for followers_uid in followers_uids:
            queue_dict = {
                'channel': 'facebook',
                'operate_type': 'add',
                'content': {
                    'xnr_user_no': task_id,
                    'uid': followers_uid
                }
            }
            if not add_operate2redis(queue_dict):
                mark = False
                return mark
        mark = True
    except:
        mark = False
    return mark
示例#2
0
文件: utils.py 项目: yuanhuiru/xnr2
def get_save_step_one(task_detail):
    es_results = es.search(index=fb_xnr_index_name,doc_type=fb_xnr_index_type,body={'query':{'match_all':{}},\
                    'sort':{'user_no':{'order':'desc'}}})['hits']['hits']
    if es_results:
        user_no_max = es_results[0]['_source']['user_no']
        user_no_current = user_no_max + 1
    else:
        user_no_current = 1
    task_detail['user_no'] = user_no_current
    task_id = user_no2fb_id(user_no_current)  #五位数 WXNR0001
    print 'task_id'
    print task_id
    try:
        item_exist = dict()
        item_exist['user_no'] = task_detail['user_no']
        item_exist['domain_name'] = task_detail['domain_name']
        item_exist['role_name'] = task_detail['role_name']
        item_exist['psy_feature'] = '&'.join(
            task_detail['psy_feature'].encode('utf-8').split(','))
        item_exist['political_side'] = task_detail['political_side']
        item_exist['business_goal'] = '&'.join(
            task_detail['business_goal'].encode('utf-8').split(','))
        # item_exist['daily_interests'] = '&'.join(task_detail['daily_interests'].encode('utf-8').split(','))
        item_exist['monitor_keywords'] = '&'.join(
            task_detail['monitor_keywords'].encode('utf-8').split(','))
        item_exist['create_status'] = 0  # 第一步完成
        print es.index(index=fb_xnr_index_name,
                       doc_type=fb_xnr_index_type,
                       id=task_id,
                       body=item_exist)
        mark = True
    except:
        mark = False
    return mark
示例#3
0
def get_save_step_two(task_detail):
    #update
    user_no_max = get_fb_xnr_no()
    user_no_current = user_no_max + 1
    r.set(fb_xnr_max_no, user_no_current)
    
    task_detail['user_no'] = user_no_current
    task_id = user_no2fb_id(user_no_current)  #五位数 FXNR0001
    
    item_exist = dict()
    item_exist['submitter'] = task_detail['submitter']
    item_exist['user_no'] = task_detail['user_no']
    item_exist['domain_name'] = task_detail['domain_name']
    item_exist['role_name'] = task_detail['role_name']
    item_exist['psy_feature'] = '&'.join(task_detail['psy_feature'].encode('utf-8').split(','))
    item_exist['political_side'] = task_detail['political_side']
    item_exist['business_goal'] = '&'.join(task_detail['business_goal'].encode('utf-8').split(','))
    # item_exist['daily_interests'] = '&'.join(task_detail['daily_interests'].encode('utf-8').split(','))
    item_exist['monitor_keywords'] = ','.join(task_detail['monitor_keywords'].encode('utf-8').split(','))

    item_exist['active_time'] = '&'.join(task_detail['active_time'].split('-'))
    item_exist['day_post_average'] = json.dumps(task_detail['day_post_average'].split('-'))
    item_exist['create_status'] = 1 # 第二步完成
    item_exist['xnr_user_no'] = task_id # 虚拟人编号
    item_exist['create_time'] = int(time.time())
    print es.index(index=fb_xnr_index_name,doc_type=fb_xnr_index_type,id=task_id,body=item_exist)
    mark = True
    return mark,task_id
示例#4
0
文件: utils.py 项目: feifanhanmc/xnr2
def addto_twitter_corpus(task_detail):
    flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(task_detail['timestamp'])
    try:
        corpus_result = es_xnr.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=task_detail['tid'])['_source']
        task_detail['text']=corpus_result['text']
        
        #查询三个指标字段
        tid_result=lookup_tid_attend_index(task_detail['tid'],task_detail['timestamp'],task_detail['timestamp'])
        if tid_result:
            task_detail['comment']=tid_result['comment']
            task_detail['share']=tid_result['share']
            task_detail['favorite']=tid_result['favorite']
        else:
            task_detail['comment']=0
            task_detail['share']=0
            task_detail['favorite']=0 

            #查询用户昵称
        task_detail['nick_name']=get_user_nickname(item['_source']['uid'])

    except:
        mark=False

    try:
        es_xnr.index(index=twitter_xnr_corpus_index_name,doc_type=twitter_xnr_corpus_index_type,id=task_detail['tid'],body=task_detail)
        mark=True
    except:
        mark=False
    return mark
示例#5
0
def addto_warning_corpus(task_detail):
    flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(task_detail['timestamp'])
    try:
        corpus_result = es_xnr_2.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=task_detail['tid'])['_source']
        corpus_result['xnr_user_no'] = task_detail['xnr_user_no']
        corpus_result['warning_source'] = task_detail['warning_source']
        corpus_result['create_time'] = task_detail['create_time']
        corpus_result['validity'] = 1
        corpus_result['nick_name'] = get_user_nickname(task_detail['uid'])

        tid_result=lookup_tid_attend_index(task_detail['tid'],task_detail['timestamp'])
        if tid_result:
            corpus_result['comment']=tid_result['comment']
            corpus_result['share']=tid_result['share']
            corpus_result['favorite']=tid_result['favorite']
        else:
            corpus_result['comment']=0
            corpus_result['share']=0
            corpus_result['favorite']=0  

        #查询好友列表
        lookup_type='fans_list'
        friends_list=lookup_xnr_fans_followers(task_detail['xnr_user_no'],lookup_type)
        set_mark = set_intersection(task_detail['uid'],friends_list)
        if set_mark > 0:
            corpus_result['content_type']='friends'
        else:
            corpus_result['content_type']='unfriends'

        es_xnr_2.index(index=twitter_warning_corpus_index_name,doc_type=twitter_warning_corpus_index_type,id=task_detail['tid'],body=corpus_result)
        mark=True
    except:
        mark=False

    return mark
示例#6
0
def write_envent_warming(today_datetime,event_warming_content,task_id):
    twitter_event_warning_index_name=twitter_event_warning_index_name_pre+ts2datetime(today_datetime)
    # print 'facebook_event_warning_index_name:',facebook_event_warning_index_name
    #try:
    es_xnr_2.index(index=twitter_event_warning_index_name,doc_type=twitter_event_warning_index_type,body=event_warming_content,id=task_id)
    mark=True
    #except:
    #    mark=False
    return mark
示例#7
0
文件: utils.py 项目: SDsonghuiui/xnr2
def domain_update_task(domain_name,
                       create_type,
                       create_time,
                       submitter,
                       description,
                       remark,
                       compute_status=0):

    task_id = pinyin.get(domain_name, format='strip', delimiter='_')

    try:
        domain_task_dict = dict()

        #domain_task_dict['xnr_user_no'] = xnr_user_no
        domain_task_dict['domain_pinyin'] = pinyin.get(domain_name,
                                                       format='strip',
                                                       delimiter='_')
        domain_task_dict['domain_name'] = domain_name
        domain_task_dict['create_type'] = json.dumps(create_type)
        domain_task_dict['create_time'] = create_time
        domain_task_dict['submitter'] = submitter
        domain_task_dict['description'] = description
        domain_task_dict['remark'] = remark
        domain_task_dict['compute_status'] = compute_status

        print 'create_type'
        print create_type

        r.lpush(tw_target_domain_detect_queue_name,
                json.dumps(domain_task_dict))

        item_exist = dict()

        #item_exist['xnr_user_no'] = domain_task_dict['xnr_user_no']
        item_exist['domain_pinyin'] = domain_task_dict['domain_pinyin']
        item_exist['domain_name'] = domain_task_dict['domain_name']
        item_exist['create_type'] = domain_task_dict['create_type']
        item_exist['create_time'] = domain_task_dict['create_time']
        item_exist['submitter'] = domain_task_dict['submitter']
        item_exist['description'] = domain_task_dict['description']
        item_exist['remark'] = domain_task_dict['remark']
        item_exist['group_size'] = ''

        item_exist['compute_status'] = 0  # 存入创建信息
        es.index(index=tw_domain_index_name,
                 doc_type=tw_domain_index_type,
                 id=item_exist['domain_pinyin'],
                 body=item_exist)

        mark = True
    except Exception, e:
        print e
        mark = False
示例#8
0
def domain_create_task(domain_name,
                       create_type,
                       create_time,
                       submitter,
                       description,
                       remark,
                       compute_status=0):
    task_id = pinyin.get(domain_name, format='strip', delimiter='_')
    try:
        es.get(index=fb_domain_index_name,
               doc_type=fb_domain_index_type,
               id=task_id)['_source']
        return 'domain name exists!'
    except:
        try:
            domain_task_dict = dict()
            domain_task_dict['domain_pinyin'] = pinyin.get(domain_name,
                                                           format='strip',
                                                           delimiter='_')
            domain_task_dict['domain_name'] = domain_name
            domain_task_dict['create_type'] = json.dumps(create_type)
            domain_task_dict['create_time'] = create_time
            domain_task_dict['submitter'] = submitter
            domain_task_dict['description'] = description
            domain_task_dict['remark'] = remark
            domain_task_dict['compute_status'] = compute_status
            # print 'domain_task_dict'
            # print domain_task_dict
            # print 'before: r.lrange'
            # print r.lrange(fb_target_domain_detect_queue_name,0,100)
            r.lpush(fb_target_domain_detect_queue_name,
                    json.dumps(domain_task_dict))
            # print 'after: r.lrange'
            # print r.lrange(fb_target_domain_detect_queue_name,0,100)
            item_exist = dict()
            item_exist['domain_pinyin'] = domain_task_dict['domain_pinyin']
            item_exist['domain_name'] = domain_task_dict['domain_name']
            item_exist['create_type'] = domain_task_dict['create_type']
            item_exist['create_time'] = domain_task_dict['create_time']
            item_exist['submitter'] = domain_task_dict['submitter']
            item_exist['description'] = domain_task_dict['description']
            item_exist['remark'] = domain_task_dict['remark']
            item_exist['group_size'] = ''
            item_exist['compute_status'] = 0  # 存入创建信息
            print es.index(index=fb_domain_index_name,
                           doc_type=fb_domain_index_type,
                           id=item_exist['domain_pinyin'],
                           body=item_exist)
            mark = True
        except Exception, e:
            print e
            mark = False
        return mark
示例#9
0
def get_save_step_three_2(task_detail):
    task_id = task_detail['task_id']
    nick_name = task_detail['nick_name']
    try:
        item_fans_followers = dict()
        followers_uids = list(set(task_detail['followers_uids'].split(',')))
        item_fans_followers['followers_list'] = followers_uids
        item_fans_followers['xnr_user_no'] = task_id
        print es.index(index=tw_xnr_fans_followers_index_name,doc_type=tw_xnr_fans_followers_index_type,id=task_id,body=item_fans_followers)
        mark = True
    except:        
        mark = False
    return mark        
示例#10
0
def addto_facebook_corpus(task_detail):
    flow_text_index_name = facebook_flow_text_index_name_pre + ts2datetime(
        task_detail['timestamp'])
    try:
        corpus_result = es_xnr.get(index=flow_text_index_name,
                                   doc_type=facebook_flow_text_index_type,
                                   id=task_detail['fid'])['_source']
        task_detail['text'] = corpus_result['text']

        #查询三个指标字段
        fid_result = lookup_fid_attend_index(task_detail['fid'],
                                             task_detail['timestamp'],
                                             task_detail['timestamp'])
        if fid_result:
            task_detail['comment'] = fid_result['comment']
            task_detail['share'] = fid_result['share']
            task_detail['favorite'] = fid_result['favorite']
        else:
            task_detail['comment'] = 0
            task_detail['share'] = 0
            task_detail['favorite'] = 0
            #查询用户昵称
        task_detail['nick_name'] = get_user_nickname(corpus_result['uid'])
        # task_detail['retweeted']=corpus_result['retweeted']
        # task_detail['comment']=corpus_result['comment']
        # task_detail['like']=corpus_result['like']
    except:
        mark = False

    try:
        es_xnr.index(index=facebook_xnr_corpus_index_name,
                     doc_type=facebook_xnr_corpus_index_type,
                     id=task_detail['fid'],
                     body=task_detail)
        mark = True
    except:
        mark = False
    return mark
示例#11
0
文件: utils.py 项目: yuanhuiru/xnr2
def create_fans_info(xnr_user_no):
    print es.index(fb_xnr_fans_followers_index_name,
                   fb_xnr_fans_followers_index_type,
                   body={'fans_list': []},
                   id=xnr_user_no)
示例#12
0
文件: utils.py 项目: SDsonghuiui/xnr2
def get_generate_example_model(domain_name, role_name, mail):

    export_group_info(domain_name, mail)

    domain_pinyin = pinyin.get(domain_name, format='strip', delimiter='_')
    role_en = tw_domain_ch2en_dict[role_name]
    task_id = domain_pinyin + '_' + role_en
    es_result = es.get(index=tw_role_index_name,
                       doc_type=tw_role_index_type,
                       id=task_id)['_source']
    item = es_result
    #     print 'es_result:::',es_result
    # 政治倾向
    political_side = json.loads(item['political_side'])[0][0]

    if political_side == 'mid':
        item['political_side'] = u'中立'
    elif political_side == 'left':
        item['political_side'] = u'左倾'
    else:
        item['political_side'] = u'右倾'

    # 心理特征
    psy_feature_list = []
    psy_feature = json.loads(item['psy_feature'])
    for i in range(TOP_PSY_FEATURE):
        psy_feature_list.append(psy_feature[i][0])
    item['psy_feature'] = '&'.join(psy_feature_list)
    role_group_uids = json.loads(item['member_uids'])

    if S_TYPE == 'test':
        current_time = datetime2ts(S_DATE)
    else:
        current_time = int(time.time())

    index_name_list = get_flow_text_index_list(current_time)
    query_body_search = {
        'query': {
            'filtered': {
                'filter': {
                    'terms': {
                        'uid': role_group_uids
                    }
                }
            }
        },
        'size': MAX_VALUE,
        '_source': ['keywords_string']
    }

    es_keyword_results = es_flow_text.search(index=index_name_list,doc_type=flow_text_index_type,\
                        body=query_body_search)['hits']['hits']
    keywords_string = ''
    for mget_item in es_keyword_results:
        keywords_string += '&'
        keywords_string += mget_item['_source']['keywords_string']
    k_dict = extract_keywords(keywords_string)

    monitor_keywords_list = []
    for item_item in k_dict:
        monitor_keywords_list.append(item_item.word.encode('utf-8'))
    item['monitor_keywords'] = ','.join(monitor_keywords_list)
    mget_results_user = es_user_portrait.mget(index=profile_index_name,
                                              doc_type=profile_index_type,
                                              body={'ids':
                                                    role_group_uids})['docs']
    item['nick_name'] = []
    for mget_item in mget_results_user:
        if mget_item['found']:
            content = mget_item['_source']
            item['nick_name'] = ''
            if content.has_key('username'):
                item['nick_name'] = content['username']
            item['location'] = ''
            if content.has_key('location'):
                item['location'] = content['location']
            item['description'] = ''
            if content.has_key('description'):
                item['description'] = content['description']

    item['business_goal'] = u'渗透'
    # item['daily_interests'] = u'旅游'
    item['age'] = 30
    item['career'] = u'自由职业'

    active_time_list_np = np.array(json.loads(item['active_time']))
    active_time_list_np_sort = np.argsort(
        -active_time_list_np)[:TOP_ACTIVE_TIME]
    item['active_time'] = active_time_list_np_sort.tolist()

    day_post_num_list = np.array(json.loads(item['day_post_num']))
    item['day_post_num'] = np.mean(day_post_num_list).tolist()
    item['role_name'] = role_name

    task_id_new = 'tw_' + domain_pinyin + '_' + role_en
    example_model_file_name = EXAMPLE_MODEL_PATH + task_id_new + '.json'
    try:
        with open(example_model_file_name, "w") as dump_f:
            json.dump(item, dump_f)
        item_dict = dict()
        item_dict['domain_name'] = domain_name
        item_dict['role_name'] = role_name
        es.index(index=tw_example_model_index_name,doc_type=tw_example_model_index_type,\
            body=item_dict,id=task_id_new)
        mark = True
    except:
        mark = False
    return mark
示例#13
0
def report_warming_content(task_detail):
    report_dict=dict()
    report_dict['report_type']=task_detail['report_type']
    report_dict['report_time']=task_detail['report_time']
    report_dict['xnr_user_no']=task_detail['xnr_user_no']
    report_dict['event_name']=task_detail['event_name']
    report_dict['uid']=task_detail['uid']

    report_dict['nick_name']=get_user_nickname(task_detail['uid'])

    tw_list=[]
    user_list=[]
    # print 'type:',type(task_detail['weibo_info']),task_detail['weibo_info']
    tw_info=task_detail['tw_info']
    for item in tw_info:
        lookup_mark=False
        item['timestamp'] = int(item['timestamp'])
        if task_detail['report_type']==u'人物':
            twitter_user_warning_index_name = twitter_user_warning_index_name_pre + ts2datetime(item['timestamp'])
            twitter_user_warming_id=task_detail['xnr_user_no']+'_'+task_detail['uid']
            try:
                twitter_user_result=es_xnr_2.get(index=twitter_user_warning_index_name,doc_type=twitter_user_warning_index_type,id=twitter_user_warming_id)['_source']
                user_warning_content=json.dumps(twitter_user_result['content'])
                for content in user_warning_content:
                    if content['tid'] == item['tid']:
                        lookup_mark=True
                        tw_list.append(content)
                    else:
                        pass
            except:
                print 'user_error!'

        elif task_detail['report_type']==u'言论':
            twitter_speech_warning_index_name = twitter_speech_warning_index_name_pre + ts2datetime(item['timestamp'])
            try:
                twitter_speech_result=es_xnr_2.get(index=twitter_speech_warning_index_name,doc_type=twitter_speech_warning_index_type,id=task_detail['xnr_user_no']+'_'+item['tid'])['_source']
                report_dict['uid']=twitter_speech_result['uid']
                lookup_mark=True
                tw_list.append(twitter_speech_result)
            except:
                # weibo_timing_warning_index_name = weibo_timing_warning_index_name_pre + ts2datetime(item['timestamp'])
                print 'speech_error!'

        elif task_detail['report_type']==u'事件':
            twitter_event_warning_index_name = twitter_event_warning_index_name_pre + ts2datetime(item['timestamp'])
            event_warning_id = task_detail['xnr_user_no']+'_'+task_detail['event_name']
            try:
                event_result=es_xnr_2.get(index=twitter_event_warning_index_name,doc_type=twitter_event_warning_index_type,id=event_warning_id)['_source']
                event_content=json.dumps(event_result['main_twitter_info'])
                for event in event_content:
                    if event['tid'] == item['tid']:
                        lookup_mark=True
                        tw_list.append(event)
                    else:
                        pass
            except:
                print 'event_error!'

        elif task_detail['report_type']==u'时间':
            year = ts2yeartime(item['timestamp'])
            twitter_timing_warning_index_name = twitter_timing_warning_index_name_pre + year +'_' + task_detail['date_time']
            try:
                time_result=es_xnr_2.search(index=twitter_timing_warning_index_name,doc_type=twitter_timing_warning_index_type,query_body={'query':{'match_all':{}}})['hits']['hits']
                time_content=[]
                for timedata in time_result:
                    for data in timedata['twitter_date_warming_content']:
                        if data['tid'] == item['tid']:
                            lookup_mark=True
                            tw_list.append(data)
                        else:
                            pass
            except:
                print 'time_error!'               

        if lookup_mark:
            pass
        else:
            flow_text_index_name = twitter_flow_text_index_name_pre + ts2datetime(item['timestamp'])
            try:
                tw_result=es_xnr_2.get(index=flow_text_index_name,doc_type=twitter_flow_text_index_type,id=item['tid'])['_source']
                tw_result['nick_name']=get_user_nickname(fb_result['uid'])
                tid_result=lookup_tid_attend_index(item['tid'],item['timestamp'])
                if tid_result:
                    tw_result['comment']=tid_result['comment']
                    tw_result['share']=tid_result['share']
                    tw_result['favorite']=tid_result['favorite']
                else:
                    tw_result['comment']=0
                    tw_result['share']=0
                    tw_result['favorite']=0  
                tw_list.append(tw_result)
            except:
                print 'flow_text error!'


    user_info=task_detail['user_info']
    if user_info:
        for uid in user_info:
            user=dict()
            try:
                user_result=es_xnr_2.get(index=twitter_user_index_name,doc_type=twitter_user_index_type,id=uid)['_source']
                user_dict['uid']=item['_id']
                user_dict['username']=user_result['username']
                if user_result.has_key('talking_about_count'):
                    user_dict['talking_about_count']=user_result['talking_about_count']
                else:
                    user_dict['talking_about_count']=0
                if user_result.has_key('likes'):
                    user_dict['likes']=user_result['likes']
                else:
                    user_dict['likes']=0
                if user_result.has_key('category'):
                    user_dict['category']=user_result['category']
                else:
                    user_dict['category']=''
                user_list.append(user)
            except:
                user_dict['uid']=item['_id']
                user_dict['username']=''
                user_dict['talking_about_count']=0
                user_dict['likes']=0
                user_dict['category']=''
                user_list.append(user)
                print 'user_list error!'
    else:
        pass

    report_content=dict()
    report_content['user_list']=user_list
    report_content['tw_list']=tw_list

    report_dict['report_content']=json.dumps(report_content)
    
    report_id=''
    if task_detail['report_type'] == u'言论':
        report_id=weibo_info[0]['tid']
    elif task_detail['report_type'] == u'人物':
        report_id=task_detail['xnr_user_no']+'_'+task_detail['uid']
    elif task_detail['report_type'] == u'事件':
        report_id=task_detail['xnr_user_no']+'_'+task_detail['event_name']
    elif task_detail['report_type'] == u'时间':
        # print weibo_info
        if tw_info:
            report_id=tw_info[0]['tid']
        else:
            report_id=str(task_detail['report_time'])


    if tw_list:
        report_mark=True
    else:
        report_mark=False
    #预警上报后不再显示问题

    now_time=int(time.time())
    twitter_report_management_index_name = twitter_report_management_index_name_pre + ts2datetime(now_time)
    if es_xnr_2.indices.exists(index=twitter_report_management_index_name):
        pass
    else:
        twitter_report_management_mappings() 

    if report_id and report_mark:
        try:
            es_xnr_2.index(index=twitter_report_management_index_name,doc_type=twitter_report_management_index_type,id=report_id,body=report_dict)
            mark=True
        except:
            mark=False
    else:
        mark=False
    return mark