def create_date_warning(today_datetime): query_body = { 'query': { 'match_all': {} }, 'size': MAX_VALUE, 'sort': { 'date_time': { 'order': 'asc' } } } #try: result = es_xnr.search(index=weibo_date_remind_index_name, doc_type=weibo_date_remind_index_type, body=query_body)['hits']['hits'] date_result = [] for item in result: #计算距离日期 date_time = item['_source']['date_time'] year = ts2yeartime(today_datetime) warming_date = year + '-' + date_time today_date = ts2datetime(today_datetime) countdown_num = (datetime2ts(warming_date) - datetime2ts(today_date)) / DAY if abs(countdown_num) < WARMING_DAY: #根据给定的关键词查询预警微博 keywords = item['_source']['keywords'] date_warming = lookup_weibo_date_warming(keywords, today_datetime) item['_source']['weibo_date_warming_content'] = json.dumps( date_warming) item['_source']['validity'] = 0 item['_source']['timestamp'] = today_datetime now_time = int(time.time()) task_id = str(item['_source']['create_time']) + '_' + str(now_time) #print 'task_id',task_id #写入数据库 weibo_timing_warning_index_name = weibo_timing_warning_index_name_pre + warming_date print weibo_timing_warning_index_name mark = False if date_warming: try: es_xnr.index(index=weibo_timing_warning_index_name, doc_type=weibo_timing_warning_index_type, body=item['_source'], id=task_id) mark = True except: mark = False else: pass date_result.append(mark) else: pass #except: # date_result=[] return date_result
def create_personal_warning(xnr_user_no, today_datetime): #查询关注列表 lookup_type = 'followers_list' followers_list = lookup_xnr_fans_followers(xnr_user_no, lookup_type) #查询虚拟人uid xnr_uid = lookup_xnr_uid(xnr_user_no) #计算敏感度排名靠前的用户 query_body = { # 'query':{ # 'filtered':{ # 'filter':{ # 'terms':{'uid':followers_list} # } # } # }, 'aggs': { 'followers_sensitive_num': { 'terms': { 'field': 'uid' }, 'aggs': { 'sensitive_num': { 'sum': { 'field': 'sensitive' } } } } }, 'size': MAX_SEARCH_SIZE } flow_text_index_name = get_day_flow_text_index_list(today_datetime) try: first_sum_result=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,\ body=query_body)['aggregations']['followers_sensitive_num']['buckets'] except: first_sum_result = [] #print first_sum_result top_userlist = [] for i in xrange(0, len(first_sum_result)): user_sensitive = first_sum_result[i]['sensitive_num']['value'] if user_sensitive > 0: user_dict = dict() user_dict['uid'] = first_sum_result[i]['key'] followers_mark = judge_user_type(user_dict['uid'], followers_list) user_dict['sensitive'] = user_sensitive * followers_mark top_userlist.append(user_dict) else: pass #################################### #如果是关注者则敏感度提升 #################################### #查询敏感用户的敏感微博内容 results = [] for user in top_userlist: #print user user_detail = dict() user_detail['uid'] = user['uid'] user_detail['user_sensitive'] = user['sensitive'] # user_lookup_id=xnr_uid+'_'+user['uid'] # print user_lookup_id # try: # #user_result=es_xnr.get(index=weibo_feedback_follow_index_name,doc_type=weibo_feedback_follow_index_type,id=user_lookup_id)['_source'] # user_result=es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=user['uid'])['_source'] # user_detail['user_name']=user_result['nick_name'] # except: user_detail['user_name'] = get_user_nickname(user['uid']) query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'uid': user['uid'] } }, { 'range': { 'sensitive': { 'gte': 1 } } }] } } } }, 'size': MAX_WARMING_SIZE, 'sort': { 'sensitive': { 'order': 'desc' } } } try: second_result = es_flow_text.search( index=flow_text_index_name, doc_type=flow_text_index_type, body=query_body)['hits']['hits'] except: second_result = [] s_result = [] #tem_word_one = '静坐' #tem_word_two = '集合' for item in second_result: #sensitive_words=item['_source']['sensitive_words_string'] #if ((sensitive_words==tem_word_one) or (sensitive_words==tem_word_two)): # pass #else: #查询用户昵称 item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) s_result.append(item['_source']) s_result.sort(key=lambda k: (k.get('sensitive', 0)), reverse=True) user_detail['content'] = json.dumps(s_result) user_detail['xnr_user_no'] = xnr_user_no user_detail['validity'] = 0 user_detail['timestamp'] = today_datetime #写入数据库 today_date = ts2datetime(today_datetime) weibo_user_warning_index_name = weibo_user_warning_index_name_pre + today_date task_id = xnr_user_no + '_' + user_detail['uid'] #print weibo_user_warning_index_name #print user_detail if s_result: try: es_xnr.index(index=weibo_user_warning_index_name, doc_type=weibo_user_warning_index_type, body=user_detail, id=task_id) mark = True except: mark = False else: pass results.append(mark) return results
def create_speech_warning(xnr_user_no, today_datetime): #查询关注列表 lookup_type = 'followers_list' followers_list = lookup_xnr_fans_followers(xnr_user_no, lookup_type) query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': { 'range': { 'sensitive': { 'gte': 1 } } } } } } }, 'size': MAX_SEARCH_SIZE, 'sort': { 'sensitive': { 'order': 'desc' } } } flow_text_index_name = get_day_flow_text_index_list(today_datetime) results = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, body=query_body)['hits']['hits'] result = [] for item in results: item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) if item['_source']['uid'] in followers_list: item['_source']['content_type'] = 'follow' else: item['_source']['content_type'] = 'unfollow' item['_source']['validity'] = 0 item['_source']['xnr_user_no'] = xnr_user_no task_id = xnr_user_no + '_' + item['_source']['mid'] #写入数据库 today_date = ts2datetime(today_datetime) weibo_speech_warning_index_name = weibo_speech_warning_index_name_pre + today_date try: es_xnr.index(index=weibo_speech_warning_index_name, doc_type=weibo_speech_warning_index_type, body=item['_source'], id=task_id) mark = True except: mark = False result.append(mark) return result
#print 'save_bot_info' wxxnr_data = { 'wx_id': wx_id, 'puid': self.self.puid, 'user_no': wxbot_id2user_no(self.wxbot_id), 'xnr_user_no': self.wxbot_id, 'wxbot_port': wxbot_port, 'create_ts': int(time.time()), 'nickname': self.self.name, 'remark': remark, 'submitter': submitter, 'mail': mail, 'access_id': access_id } es_xnr.index(index=wx_xnr_index_name, doc_type=wx_xnr_index_type, id=self.wxbot_id, body=wxxnr_data) def set_default_groups(self): try: d = r.get(self.wxbot_id) if d: data = eval(d) create_flag = data['create_flag'] if create_flag: group_list = [] groups = self.groups(update=True) for group in groups: #load members details group.update_group(members_details=True) group_list.append(group.puid)
def proc_msg(self, msg): group_puid = msg.sender.puid if group_puid in self.groups_list: msg_type = msg.type save_flag = 0 data = {} if msg_type in ['Text', 'Picture', 'Recording']: save_flag = 1 data = { 'xnr_id': self.self.puid, 'xnr_name': self.self.name, 'group_id': group_puid, 'group_name': msg.sender.name, 'timestamp': msg.raw['CreateTime'], # 'speaker_id': msg.member.puid, 'speaker_id': self.load_member_id(msg.member), 'speaker_name': msg.member.name, 'msg_type': msg_type } nowDate = datetime.datetime.now().strftime('%Y-%m-%d') index_name = wx_group_message_index_name_pre + str(nowDate) if msg_type == 'Text': text = msg.text data['text'] = text try: sen_value, sen_words = sensitive_check(text.encode('utf8')) if sen_value != 0: sen_flag = 1 #该条信息是敏感信息 else: sen_flag = 0 if msg.is_at: at_flag = 1 #被@到 else: at_flag = 0 data['at_flag'] = at_flag data['sensitive_flag'] = sen_flag data['sensitive_value'] = sen_value data['sensitive_words_string'] = sen_words[ 'sensitive_words_string'] except Exception, e: print e elif msg_type == 'Picture': ''' #保存到七牛(已弃用,2018-1-2,hanmc) try: #save picture filename = str(msg.id) + '.png' filepath = os.path.join(self.data_path, filename) msg.get_file(filepath) #upload picture to qiniu.com token = self.qiniu.upload_token(qiniu_bucket_name, filename, 3600) ret, info = put_file(token, filename, filepath,) data['text'] = qiniu_bucket_domain + '/' + filename os.remove(filepath) except Exception,e: print e ''' #保存到本地 # filename = str(msg.id) + '.png' filename = str(msg.id) + str( msg.file_name) #按图片类型.png .jpg .gif来存储 filepath = os.path.join(WX_IMAGE_ABS_PATH, ts2datetime(time.time())) if not os.path.isdir(filepath): os.mkdir(filepath) #定期清理放在timed_python_files/wx_regular_cleaning.py定时文件中 # remove_wx_media_old_files(WX_IMAGE_ABS_PATH, period=30) save_path = os.path.join(filepath, filename) msg.get_file(save_path) #对图片进行压缩 #.gif图片不处理, .png图片处理, .jpg图片处理 image_type = filename.split('.')[-1] if image_type == 'gif': pass elif image_type == 'png': os.popen("optipng " + save_path + " -snip") elif image_type == 'jpg': os.popen("jpegoptim " + save_path) #### data['text'] = os.path.join(filepath, filename) elif msg_type == 'Recording': filename = str(msg.id) + '.mp3' filepath = os.path.join(WX_VOICE_ABS_PATH, ts2datetime(time.time())) if not os.path.isdir(filepath): os.mkdir(filepath) #定期清理放在timed_python_files/wx_regular_cleaning.py定时文件中 # remove_wx_media_old_files(WX_VOICE_ABS_PATH, period=30) msg.get_file(save_path=os.path.join(filepath, filename)) data['text'] = os.path.join(filepath, filename) #存储msg到es中 if save_flag: if not es_xnr.indices.exists(index=index_name): #print 'get mapping' wx_group_message_mappings(index_name) es_xnr.index(index=index_name, doc_type=wx_group_message_index_type, body=data) #自动回复监听的群组中@自己的消息 if msg.is_at: time.sleep(random.random()) m = msg.reply(u'知道啦~') self.save_sent_msg(m=m, to_puid=msg.sender.puid, to_name=msg.sender.name)
def save_to_fans_follow_ES(xnr_user_no, uid, save_type, follow_type, trace_type='ordinary_follow'): if save_type == 'followers': try: results = es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no) results = results["_source"] if follow_type == 'follow': if trace_type == 'trace_follow': # 添加追随关注 try: trace_follow_uids = results['trace_follow_list'] trace_follow_uids_set = set(trace_follow_uids) trace_follow_uids_set.add(uid) trace_follow_uids = list(trace_follow_uids_set) except: trace_follow_uids = [uid] # 添加普通关注 try: followers_uids = results['followers_list'] followers_uids_set = set(followers_uids) followers_uids_set.add(uid) followers_uids = list(followers_uids_set) except: followers_uids = [uid] results['followers_list'] = followers_uids results['trace_follow_list'] = trace_follow_uids es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) else: try: followers_uids = results['followers_list'] followers_uids_set = set(followers_uids) followers_uids_set.add(uid) followers_uids = list(followers_uids_set) except: followers_uids = [uid] results['followers_list'] = followers_uids es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) elif follow_type == 'unfollow': try: followers_uids = results['followers_list'] followers_uids = list( set(followers_uids).difference(set([uid]))) results['followers_list'] = followers_uids es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) except: return False except: #if follow_type == 'follow': body_info = {} body_info['followers_list'] = [uid] body_info['xnr_user_no'] = xnr_user_no es_xnr.index(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no, body=body_info) #elif follow_type == 'unfollow': elif save_type == 'fans': try: results = es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no) results = results["_source"] try: fans_uids = results['fans_list'] fans_uids_set = set(fans_uids) fans_uids_set.add(uid) fans_uids = list(fans_uids_set) results['fans_list'] = fans_uids except: results['fans_list'] = [uid] es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no,body={'doc':results}) except: body_info = {} body_info['fans_list'] = [uid] body_info['xnr_user_no'] = xnr_user_no es_xnr.index(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type,\ id=xnr_user_no, body=body_info) return True
def cron_compute_mark_qq(current_time): current_date = ts2datetime(current_time) current_time_new = datetime2ts(current_date) xnr_results = es.search(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,\ body={'query':{'match_all':{}},'size':MAX_SEARCH_SIZE})['hits']['hits'] if S_TYPE == 'test': xnr_results = [{ '_source': { 'xnr_user_no': 'QXNR0007', 'qq_number': '1039598173' } }] for result in xnr_results: print 'result....', result xnr_user_no = result['_source']['xnr_user_no'] qq_number = result['_source']['qq_number'] #xnr_user_no = 'WXNR0004' influence_dict = get_influence_at_num(xnr_user_no, qq_number, current_time) penetration_dict = get_penetration_num(xnr_user_no, qq_number, current_time) safe_dict = qq_history_count(xnr_user_no, qq_number, current_time) #_id = xnr_user_no + '_' + current_date _id = xnr_user_no xnr_user_detail = {} xnr_user_detail['influence'] = influence_dict['mark'] xnr_user_detail['penetration'] = penetration_dict['mark'] xnr_user_detail['safe'] = safe_dict['mark'] xnr_user_detail['daily_be_at_num'] = influence_dict['daily_be_at_num'] xnr_user_detail['total_be_at_num'] = influence_dict['total_be_at_num'] xnr_user_detail['daily_sensitive_num'] = penetration_dict[ 'sensitive_info'] #xnr_user_detail['daily_sensitive_num'] = penetration_dict['daily_sensitive_num'] xnr_user_detail['total_post_num'] = safe_dict['total_post_num'] xnr_user_detail['daily_post_num'] = safe_dict['daily_post_num'] xnr_user_detail['date_time'] = current_date xnr_user_detail['timestamp'] = current_time_new xnr_user_detail['xnr_user_no'] = xnr_user_no xnr_user_detail['qq_number'] = qq_number qq_xnr_history_count_index_name = qq_xnr_history_count_index_name_pre + current_date try: #print 'xnr_user_detail...',xnr_user_detail print 'qq_xnr_history_count_index_name...', qq_xnr_history_count_index_name qq_xnr_history_count_mappings(qq_xnr_history_count_index_name) es.index(index=qq_xnr_history_count_index_name,doc_type=qq_xnr_history_count_index_type,\ id=_id,body=xnr_user_detail) mark = True except: mark = False return mark
def create_personal_warning(xnr_user_no, today_datetime): #查询好友列表 friends_list = lookup_xnr_friends(xnr_user_no) #查询虚拟人uid xnr_uid = lookup_xnr_uid(xnr_user_no) #计算敏感度排名靠前的用户 query_body = { # 'query':{ # 'filtered':{ # 'filter':{ # 'terms':{'uid':friends_list} # } # } # }, 'aggs': { 'friends_sensitive_num': { 'terms': { 'field': 'uid' }, 'aggs': { 'sensitive_num': { 'sum': { 'field': 'sensitive' } } } } }, 'size': MAX_SEARCH_SIZE } facebook_flow_text_index_name = get_timets_set_indexset_list( facebook_flow_text_index_name_pre, today_datetime, today_datetime) try: first_sum_result=es_xnr.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,\ body=query_body)['aggregations']['friends_sensitive_num']['buckets'] except: first_sum_result = [] #print 'first_sum_result',first_sum_result top_userlist = [] for i in xrange(0, len(first_sum_result)): user_sensitive = first_sum_result[i]['sensitive_num']['value'] if user_sensitive > 0: user_dict = dict() user_dict['uid'] = first_sum_result[i]['key'] friends_mark = judge_user_type(user_dict['uid'], friends_list) user_dict['sensitive'] = user_sensitive * friends_mark top_userlist.append(user_dict) else: pass ##################### #如果是好友,则用户敏感度计算值增加1.5倍 ##################### #查询敏感用户的敏感内容 results = [] for user in top_userlist: #print user user_detail = dict() user_detail['uid'] = user['uid'] user_detail['user_sensitive'] = user['sensitive'] user_lookup_id = user['uid'] print user_lookup_id # try: # #user_result=es_xnr.get(index=facebook_feedback_friends_index_name,doc_type=facebook_feedback_friends_index_type,id=user_lookup_id)['_source'] # user_result=es_xnr.get(index=facebook_user_index_name,doc_type=facebook_user_index_type,id=user['uid'])['_source'] # user_detail['user_name']=user_result['nick_name'] # except: # user_detail['user_name']='' user_detail['user_name'] = get_user_nickname(user['uid']) query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'term': { 'uid': user['uid'] } }, { 'range': { 'sensitive': { 'gte': 1 } } }] } } } }, 'size': MAX_WARMING_SIZE, 'sort': { 'sensitive': { 'order': 'desc' } } } try: second_result = es_xnr.search( index=facebook_flow_text_index_name, doc_type=facebook_flow_text_index_type, body=query_body)['hits']['hits'] except: second_result = [] s_result = [] for item in second_result: #查询三个指标字段 fid_result = lookup_fid_attend_index(item['_source']['fid'], today_datetime) if fid_result: item['_source']['comment'] = fid_result['comment'] item['_source']['share'] = fid_result['share'] item['_source']['favorite'] = fid_result['favorite'] else: item['_source']['comment'] = 0 item['_source']['share'] = 0 item['_source']['favorite'] = 0 #查询用户昵称 item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) s_result.append(item['_source']) s_result.sort(key=lambda k: (k.get('sensitive', 0)), reverse=True) user_detail['content'] = json.dumps(s_result) user_detail['xnr_user_no'] = xnr_user_no user_detail['validity'] = 0 user_detail['timestamp'] = today_datetime #写入数据库 today_date = ts2datetime(today_datetime) facebook_user_warning_index_name = facebook_user_warning_index_name_pre + today_date task_id = xnr_user_no + '_' + user_detail['uid'] if s_result: try: es_xnr.index(index=facebook_user_warning_index_name, doc_type=facebook_user_warning_index_type, body=user_detail, id=task_id) mark = True except: mark = False else: pass results.append(mark) return results
u'451016634935094', u'359574464219603', u'100000353049421', u'717234834', u'100011204707611', u'100000065704494', u'780790723', u'100005960898332', u'206986566009728', u'100003481030289', u'100008144074564', u'135252119870284', u'100007426740391', u'1517589828', u'100014335805805', u'100018206347610', u'100018794590981', u'100012225906969', u'1466849490028320', u'100005004039054', u'1196435997092687', u'100010967027774', u'152100711485335', u'1768200884', u'100000960373995', u'100004783215425', u'100014321793964', u'100002433998672', u'100000042158598', u'1359383878', u'100006736002878', u'100001469904363', u'100011257748826', u'100021891726122', u'706676622729838', u'100003491408719', u'812623535531819', u'852067068172077', u'100012258524129', u'1140849537', u'100010739386824', u'100006590973401', u'100009377185598', u'1478123819', u'100010559224139', u'100000657330094', u'100006740970861', u'1640482902830291', u'100017177435135', u'767067873371162', u'100004017334041', u'366243453719070', u'100004666743754', u'115631625122669', u'317012365084676', u'1302631509', u'100011911669425', u'100001359256884' ] user = "******" task_detail = dict() task_detail["task_name"] = fb_id_sensing task_detail["remark"] = "感知热门事件" task_detail["social_sensors"] = json.dumps(list(social_sensors)) task_detail["history_status"] = json.dumps([]) print es.index(index=fb_id_sensing, doc_type=fb_type_sensing, id=fb_id_sensing, body=task_detail)
def save_event_warning(xnr_user_no,start_time,end_time): #判断数据库是否存在: today_date=ts2datetime(end_time) today_datetime = datetime2ts(today_date) weibo_event_warning_index_name = weibo_event_warning_index_name_pre+today_date if not es_xnr.indices.exists(index=weibo_event_warning_index_name): weibo_event_warning_mappings(weibo_event_warning_index_name) new_event_warning = create_event_warning(xnr_user_no,start_time,end_time) today_history_event_warning,old_name_list = lookup_history_event_warming(xnr_user_no,today_datetime,end_time) print 'warning!!!',len(new_event_warning) results = [] if new_event_warning: for item in new_event_warning: event_mark = set_intersection(item['event_name'],old_name_list) if event_mark == 1: task_id = xnr_user_no+'_'+ item['event_name'] old_event = es_xnr.get(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,id=task_id)['_source'] #用户合并 old_event_main_info = json.loads(old_event['main_user_info']) old_event_uid_list = [user['uid'] for user in old_main_user_info] new_event_main_info = json.loads(item['main_user_info']) new_event_uid_list = [user['uid'] for user in new_event_main_info] add_uid_list = list(set(new_event_uid_list) - set(old_event_uid_list)&set(new_event_uid_list)) new_main_user_info = [] for uid in add_uid_list: uid_info = [u for u in item['main_user_info'] if u['uid'] == uid] if uid_info: new_main_user_info.append(uid_info[0]) else: pass old_event['main_user_info'].extend(new_main_user_info) old_event_weibo_info = json.loads(old_event['main_weibo_info']) old_event_mid_list = [content['mid'] for content in old_event_weibo_info] new_event_weibo_info = json.loads(item['main_weibo_info']) new_event_mid_list = [content['mid'] for content in new_event_weibo_info] add_weibo_list = list(set(new_event_mid_list) - set(new_event_mid_list)&set(old_event_mid_list)) new_main_weibo_info = [] for mid in add_weibo_list: mid_info = [t for t in item['main_weibo_info'] if t['mid'] == mid] if mid_info: new_main_weibo_info.append(mid_info[0]) else: pass old_event['main_weibo_info'].extend(new_main_weibo_info) old_event['event_influence']=old_event['event_influence']+item['event_influence'] try: es_xnr.update(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,id=task_id) mark=True except: mark=False else: #直接存储 task_id=xnr_user_no+'_'+ item['event_name'] try: es_xnr.index(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,body=item,id=task_id) mark=True except: mark=False results.append(mark) else: pass print 'event_waring::',results return results
def create_speech_warning(xnr_user_no, today_datetime): #查询好友列表 friends_list = lookup_xnr_friends(xnr_user_no) query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': { 'range': { 'sensitive': { 'gte': 1 } } } } } } }, 'size': MAX_SEARCH_SIZE, 'sort': { 'sensitive': { 'order': 'desc' } } } facebook_flow_text_index_name = get_timets_set_indexset_list( facebook_flow_text_index_name_pre, today_datetime, today_datetime) #print facebook_flow_text_index_name results = es_xnr.search(index=facebook_flow_text_index_name, doc_type=facebook_flow_text_index_type, body=query_body)['hits']['hits'] #print results result = [] for item in results: if item['_source']['uid'] in friends_list: item['_source']['content_type'] = 'friends' else: item['_source']['content_type'] = 'unfriends' item['_source']['validity'] = 0 item['_source']['xnr_user_no'] = xnr_user_no #查询三个指标字段 fid_result = lookup_fid_attend_index(item['_source']['fid'], today_datetime) if fid_result: item['_source']['comment'] = fid_result['comment'] item['_source']['share'] = fid_result['share'] item['_source']['favorite'] = fid_result['favorite'] else: item['_source']['comment'] = 0 item['_source']['share'] = 0 item['_source']['favorite'] = 0 #查询用户昵称 item['_source']['nick_name'] = get_user_nickname( item['_source']['uid']) task_id = xnr_user_no + '_' + item['_source']['fid'] #写入数据库 today_date = ts2datetime(today_datetime) facebook_speech_warning_index_name = facebook_speech_warning_index_name_pre + today_date #facebook_speech_warning_index_name=facebook_speech_warning_index_name_pre+FACEBOOK_FLOW_START_DATE # try: es_xnr.index(index=facebook_speech_warning_index_name, doc_type=facebook_speech_warning_index_type, body=item['_source'], id=task_id) mark = True # except: # mark=False result.append(mark) return result
def read_flow_text(flow_text_index_name,current_date): #flow_text_index_name = flow_text_index_name_pre + current_date i = 0 label_count_dict = {} content_dict = {} while True: query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'message_type':1}}, {'term':{'sensitive':0}} ] } }, 'size':1000, 'from':i*1000, 'sort':{'user_fansnum':{'order':'desc'}} } # 原创、sensitive为0 search_results = es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,\ body=query_body)['hits']['hits'] print es_flow_text,flow_text_index_name weibo_list = [] print 'len..',len(search_results) for result in search_results: result = result['_source'] weibo_list.append(result['text'].encode('utf-8')) label_list = triple_classifier_new(weibo_list) label_count = Counter(label_list) for j in range(len(search_results)): label = label_list[j] search_results[j]['_source']['label'] = label try: if label_count_dict[label] < 20: content_dict[label].append(search_results[j]['_source']) label_count_dict[label] += 1 except: content_dict[label] = [search_results[j]['_source']] label_count_dict[label] = 1 i += 1 print 'i..',i # 循环终止条件 min_label_count = min(label_count_dict, key=label_count_dict.get) if label_count_dict[min_label_count] >= 20: break print 'label_count_dict::',label_count_dict for content_label,content_weibo in content_dict.iteritems(): #_id = content_label index_name = daily_interest_index_name_pre +'_'+ current_date daily_inerests_flow_text_mappings(index_name) #item_dict = {} #item_dict['timestamp'] = datetime2ts(current_date) #item_dict['content'] = json.dumps(content_weibo) for daily_weibo in content_weibo: mid = daily_weibo['mid'] print es_xnr.index(index=index_name,doc_type=daily_interest_index_type,id=mid,body=daily_weibo) print content_label,'====',len(content_weibo)
def read_flow_text(flow_text_index_name,current_date): #flow_text_index_name = facebook_flow_text_index_name_pre + current_date i = 0 label_count_dict = {} content_dict = {} print '!!!' while True: query_body = { 'query':{ 'bool':{ 'must':[ {'term':{'sensitive':0}} ] } }, 'size':1000, 'from':i*1000 } # 原创、sensitive为0 #print '222' search_results = es_xnr.search(index=flow_text_index_name,doc_type=facebook_flow_text_index_type,\ body=query_body)['hits']['hits'] weibo_list = [] for result in search_results: result = result['_source'] weibo_list.append(result['text'].encode('utf-8')) label_list = triple_classifier_new(weibo_list) label_count = Counter(label_list) #print '333' for j in range(len(search_results)): label = label_list[j] try: if label_count_dict[label] < 20: content_dict[label].append(search_results[j]['_source']) label_count_dict[label] += 1 except: content_dict[label] = [search_results[j]['_source']] label_count_dict[label] = 1 i += 1 if i % 1000 == 0: print 'i...',i print 'label_count_dict...',label_count_dict # 循环终止条件 min_label_count = min(label_count_dict, key=label_count_dict.get) if label_count_dict[min_label_count] >= 20: break print 'label_count_dict::',label_count_dict for content_label,content_weibo in content_dict.iteritems(): _id = content_label index_name = fb_daily_interest_index_name_pre +'_'+ current_date fb_daily_inerests_flow_text_mappings(index_name) item_dict = {} item_dict['timestamp'] = datetime2ts(current_date) item_dict['content'] = json.dumps(content_weibo) print es_xnr.index(index=index_name,doc_type=fb_daily_interest_index_type,id=_id,body=item_dict) print content_label,'====',len(content_weibo)
from elasticsearch import Elasticsearch social_sensors = ["1738004582", "1784473157", "2286908003", "1717833412", "1314608344", "1644114654",\ "1686546714", "1656737654", "2028810631", "1677991972", "3881380517", "1847582585", "1651428902",\ "1420157965", "1913382117", "1884334303", "1734530730", "1893278624", "1720962692", "1700648435",\ "3288875501", "1672519561", "2034347300", "1688864597", "2615417307", "1191965271", "1643971635", \ "1778758223", "1216431741", "1698823241", "1977460817", "1644729004", "1231759973", "1231759973",\ "1315591982", "1656831930", "1926909715", "1699432410", "1660452532", "1722628512", "1267454277",\ "1640601392", "2443459455", "3921730119", "1867571077", "1718493627", "1653460650", "1737737970",\ "2616293707", "3271121353", "1642591402", "1326410461", "1645705403", "1985593262", "1654164742",\ "1638781994", "2993049293", "1653944045", "5977555696", "1992613670", "1726393244", "1216431741",\ "1724367710", "1880087643", "2827102952", "1974808274", "1700720163", "3164957712", "3266943013",\ "2127460165", "2083844833", "5305757517", "2803301701", "2656274875", "1618051664", "1974576991", \ "1642512402", "1649173367", "1658388624", "1697601814", "1703371307", "1638782947", "1402977920", \ "1893801487", "2108053230", "1649469284", "1975995305", "2810373291", "1749990115", "1663937380", \ "1497087080", "1652484947", "2162541102", "2462605080", "1650111241", "1265998927", "1698857957", \ "1887790981", "1698233740", "3712035812", "5044281310", "1701401324", "1571497285", "1635764393"] user = "******" task_detail = dict() task_detail["task_name"] = id_sensing task_detail["remark"] = "感知热门事件" task_detail["social_sensors"] = json.dumps(list(social_sensors)) task_detail["history_status"] = json.dumps([]) print es.index(index=index_sensing, doc_type=type_sensing, id=id_sensing, body=task_detail)
def read_tracing_followers_tweet(): if S_TYPE == 'test': query_body = { 'query': { 'term': { 'xnr_user_no': 'WXNR0004' } }, 'size': MAX_SEARCH_SIZE } else: query_body = {'query': {'match_all': {}}, 'size': MAX_SEARCH_SIZE} results = es_xnr.search(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\ body=query_body)['hits']['hits'] if results: for result in results: result = result['_source'] try: xnr_user_no = result['xnr_user_no'] print xnr_user_no trace_follow_list = result['trace_follow_list'] except: continue print 'trace_follow_list...', trace_follow_list if S_TYPE == 'test': current_time = datetime2ts(S_DATE) #trace_follow_list = TRACE_FOLLOW_LIST else: current_time = int(time.time()) current_date = ts2datetime(current_time) flow_text_index_name = flow_text_index_name_pre + current_date print flow_text_index_name query_body_flow = { 'query': { 'filtered': { 'filter': { 'terms': { 'uid': trace_follow_list } } } }, 'size': MAX_SEARCH_SIZE } results_flow = es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,\ body=query_body_flow)['hits']['hits'] print 'results_flow..', results_flow if results_flow: for result_flow in results_flow: result_flow = result_flow['_source'] mid = result_flow['mid'] #先判断 之前是否已经存过该mid task_id = xnr_user_no + '_' + mid try: # 如果已添加则跳过 es_xnr.get(index=weibo_xnr_retweet_timing_list_index_name,doc_type=\ weibo_xnr_retweet_timing_list_index_type,id=task_id)['_source'] continue except: # 如果未添加过则加入列表 task_detail = {} task_detail['xnr_user_no'] = xnr_user_no task_detail['mid'] = mid task_detail['text'] = result_flow['text'] task_detail['uid'] = result_flow['uid'] task_detail['nick_name'], task_detail[ 'photo_url'] = uid2nick_name_photo( result_flow['uid']) task_detail['timestamp'] = result_flow['timestamp'] task_detail['timestamp_set'] = result_flow[ 'timestamp'] + random.randint( RETWEET_START_TS, RETWEET_END_TS) task_detail['compute_status'] = 0 print 'insert new!!!!' print 'es_xnr...', es_xnr print es_xnr.index(index=weibo_xnr_retweet_timing_list_index_name,doc_type=\ weibo_xnr_retweet_timing_list_index_type,body=task_detail,id=task_id)
def lookup_timestamp_posts(start_time, end_time): start_date = ts2datetime(start_time) end_date = ts2datetime(end_time) flow_text_index_name_list = [] if start_date == end_date: print '11' index_name = flow_text_index_name_pre + end_date flow_text_index_name_list.append(index_name) sensitive_index_name = weibo_sensitive_post_index_name_pre + end_date if es_xnr.indices.exists(index=sensitive_index_name): pass else: weibo_sensitive_post_mappings(sensitive_index_name) print '111' else: start_index_name = flow_text_index_name_pre + start_date end_index_name = flow_text_index_name_pre + end_date flow_text_index_name_list.append(start_index_name) flow_text_index_name_list.append(end_index_name) sensitive_start_index = weibo_sensitive_post_index_name_pre + start_date sensitive_end_index = weibo_sensitive_post_index_name_pre + end_date if not es_xnr.indices.exists(index=sensitive_start_index): weibo_sensitive_post_mappings(sensitive_start_index) if not es_xnr.indices.exists(index=sensitive_end_index): weibo_sensitive_post_mappings(sensitive_end_index) query_body = { 'query': { 'filtered': { 'filter': { 'bool': { 'must': [{ 'range': { 'timestamp': { 'gte': start_time, 'lte': end_time } } }] } } } }, 'sort': { 'timestamp': { 'order': 'desc' } }, 'size': 50 } print 'start search!!!' print flow_text_index_name_list try: es_result=es_flow_text.search(index=flow_text_index_name_list,doc_type=flow_text_index_type,\ body=query_body)['hits']['hits'] warning_type = 'user' print 'repeat!!!' hot_result = remove_repeat(es_result, warning_type) print 'save!!!' for item in hot_result: task_id = item['mid'] # item['order_type']='timestamp' post_index_name = weibo_sensitive_post_index_name_pre + ts2datetime( item['timestamp']) es_xnr.index(index=post_index_name, doc_type=weibo_sensitive_post_index_type, body=item, id=task_id) # hot_result=[] # for item in es_result: # item['_source']['nick_name']=get_user_nickname(item['_source']['uid']) # hot_result.append(item['_source']) mark_result = True print 'finish!' except: mark_result = False return mark_result
def save_role_feature_analysis(role_results, role_label, domain, role_id, task_id): mark = False try: item_exist = es_xnr.get(index=weibo_role_index_name, doc_type=weibo_role_index_type, id=role_id)['_source'] item_exist['role_pinyin'] = role_id item_exist['role_name'] = role_label item_exist['domains'] = domain item_exist['personality'] = json.dumps(role_results['personality']) item_exist['political_side'] = json.dumps( role_results['political_side']) item_exist['geo'] = json.dumps(role_results['geo']) item_exist['active_time'] = json.dumps( list(role_results['active_time'])) item_exist['day_post_num'] = json.dumps( list(role_results['day_post_num'])) item_exist['psy_feature'] = json.dumps(role_results['psy_feature']) item_exist['member_uids'] = json.dumps(role_results['member_uids']) es_xnr.update(index=weibo_role_index_name, doc_type=weibo_role_index_type, id=role_id, body={'doc': item_exist}) item_domain = dict() item_domain['compute_status'] = 3 # 存入角色分析结果 es_xnr.update(index=weibo_domain_index_name, doc_type=weibo_domain_index_type, id=task_id, body={'doc': item_domain}) except Exception, e: item_exist = dict() item_exist['role_pinyin'] = role_id item_exist['role_name'] = role_label item_exist['domains'] = domain item_exist['personality'] = json.dumps(role_results['personality']) item_exist['political_side'] = json.dumps( role_results['political_side']) item_exist['geo'] = json.dumps(role_results['geo']) item_exist['active_time'] = json.dumps( list(role_results['active_time'])) item_exist['day_post_num'] = json.dumps( list(role_results['day_post_num'])) item_exist['psy_feature'] = json.dumps(role_results['psy_feature']) item_exist['member_uids'] = json.dumps(role_results['member_uids']) es_xnr.index(index=weibo_role_index_name, doc_type=weibo_role_index_type, id=role_id, body=item_exist) item_domain = dict() item_domain['compute_status'] = 3 # 存入角色分析结果 es_xnr.update(index=weibo_domain_index_name, doc_type=weibo_domain_index_type, id=task_id, body={'doc': item_domain})
def onQQMessage(bot, contact, member, content): # 当收到 QQ 消息时被调用 # bot : QQBot 对象,提供 List/SendTo/GroupXXX/Stop/Restart 等接口,详见文档第五节 # contact : QContact 对象,消息的发送者 # member : QContact 对象,仅当本消息为 群或讨论组 消息时有效,代表实际发消息的成员 # content : str 对象,消息内容 INFO('test groups %s', bot.List('group')) INFO('bot.conf %s', bot.conf) print 'contact.============.',contact if contact.ctype == 'group': INFO('群的 QQ.. %s', contact.qq) # #NULL INFO('群的昵称.. %s', contact.nick) # 嘿哼哈 INFO('成员的 QQ.. %s', member.qq) # #NULL INFO('成员的昵称.. %s', member.nick) # /石沫沫 INFO('最后发言时间.. %s', member.last_speak_time) # -1 INFO('消息.. %s', content) # test内容 last_speak_time = int(time.time()) print 'last_speak_time..',last_speak_time if content == '': INFO('您发了一张图片或假消息... %s', content) else: sen_value,sen_words = sensitive_check(content) # sen_words包含sensitive_words_string:北京&达赖和sensitive_words_dict if sen_value !=0: sen_flag = 1 #该条信息是敏感信息 else: sen_flag = 0 # qq_item = { # 'xnr_qq_number': bot.session.qq, # 'xnr_nickname': bot.session.nick, # 'timestamp': member.last_speak_time, # 'speaker_qq_number': member.qq, # 'text': content, # 'sensitive_flag':sen_flag, # 'sensitive_value': sen_value, # 'sensitive_words_string': sen_words['sensitive_words_string'], # 'speaker_nickname': member.nick, # 'qq_group_number': contact.qq, # 'qq_group_nickname': contact.nick # } qq_item = { 'xnr_qq_number': bot.session.qq, 'xnr_nickname': bot.session.nick, 'timestamp': last_speak_time, 'speaker_qq_number': '', 'text': content, 'sensitive_flag':sen_flag, 'sensitive_value': sen_value, 'sensitive_words_string': sen_words['sensitive_words_string'], 'speaker_nickname': member.nick, 'qq_group_number': '', 'qq_group_nickname': contact.nick } qq_json = json.dumps(qq_item) print 'qq_json=====:',qq_json # 判断该qq群是否在redis的群set中 #qq_number = qq_item['xnr_qq_number'] #qq_group_number = qq_item['qq_group_number'] # r_qq_group_set = r_qq_group_set_pre + qq_number # qq_group_set = r.smembers(r_qq_group_set) #test #qq_group_set = set(['531811289']) #if qq_group_number in qq_group_set: conMD5 = string_md5(content) nowDate = datetime.datetime.now().strftime('%Y-%m-%d') index_name = group_message_index_name_pre+ str(nowDate) #index_id = bot.conf.qq + '_' + contact.qq + '_' + str(member.last_speak_time) + '_' + conMD5 # 让系统随机分配 _id if not es.indices.exists(index=index_name): print 'get mapping' print group_message_mappings(bot.session.qq,nowDate) print 'qq_item.....',qq_item print es.index(index=index_name, doc_type=group_message_index_type,body=qq_item)