def ajax_get_current_hot_weibo(): task_name = request.args.get('task_name', '') update_time = request.args.get("update_time", "") pinyin_task_name = pinyin.get(task_name.encode('utf-8'), format='strip', delimiter="_") index_name = "stimulation_" + pinyin_task_name index_type = "stimulation_results" es_results = es_prediction.get(index=index_name, doc_type=index_type, id=update_time)["_source"] results = json.loads(es_results["current_hot_weibo"]) attribute_list = [ "comment", "uid", "text", "uname", "fansnum", "retweet", "mid", "geo", "photo_url", "statusnum", "timestamp" ] return_list = [] for each in results: new_dict = dict() for item in attribute_list: new_dict[item] = each[item] return_list.append(new_dict) return json.dumps(return_list)
def ajax_get_future_user_info(): task_name = request.args.get('task_name','') update_time = request.args.get("update_time","") pinyin_task_name = pinyin.get(task_name.encode('utf-8'), format='strip', delimiter="_") index_name = "stimulation_"+pinyin_task_name index_type = "stimulation_results" es_results = es_prediction.get(index=index_name, doc_type=index_type, id=update_time)["_source"] results = es_results["future_user_info"] return results
def get_task_detail(): task_name = request.args.get('task_name','') pinyin_task_name = pinyin.get(task_name.encode('utf-8'), format='strip', delimiter="_") task_detail = es_prediction.get(index=index_manage_interfere_task, doc_type=type_manage_interfere_task,id=pinyin_task_name)["_source"] task_dict = dict() attribute_list = ["task_name", "remark", "submit_user","submit_time","start_time", "stop_time","update_time","finish","should_keywords","must_keywords"] for item in attribute_list: task_dict[item] = task_detail[item] return json.dumps(task_dict)
def ajax_show_social_sensors(): results = [] task_detail = es_prediction.get(index="manage_sensing_task", doc_type="task", id="social_sensing_task")["_source"] uid_list = json.loads(task_detail["social_sensors"]) profile_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list})["docs"] for item in profile_results: ''' tmp = [] if item["found"]: tmp.append(item["_source"]["nick_name"]) tmp.append(item["_source"]["uid"]) tmp.append(item["_source"]["user_location"]) tmp.append(item["_source"]["fansnum"]) tmp.append(item["_source"]["statusnum"]) tmp.append(item["_source"]["friendsnum"]) else: tmp.append(item["_id"]) tmp.append(item["_id"]) tmp.append('') tmp.append('') tmp.append('') tmp.append('') results.append(tmp) ''' if item["found"]: results.append(item['_source']) else: item_new = {} item_new['photo_url'] = '' item_new['nick_name'] = '' item_new['uid'] = item['_id'] item_new['sex'] = '' item_new['fansnum'] = '' item_new['friendsnum'] = '' item_new['user_location'] = '' item_new['create_at'] = '' results.append(item_new) return json.dumps(results)
def ajax_delete_social_sensors(): finish = ["0"] delete_user = request.args.get("delete_users", '') # &.join task_detail = es_prediction.get(index="manage_sensing_task", doc_type="task", id="social_sensing_task")["_source"] sensors = json.loads(task_detail["social_sensors"]) if delete_user: uid_list = delete_user.split("&") new_list = set(sensors) - set(uid_list) task_detail["social_sensors"] = json.dumps(list(new_list)) es_prediction.index(index="manage_sensing_task", doc_type="task", id="social_sensing_task", body=task_detail) finish = ["1"] return json.dumps(finish)
def ajax_get_hot_user(): task_name = request.args.get('task_name','') update_time = request.args.get("update_time","") pinyin_task_name = pinyin.get(task_name.encode('utf-8'), format='strip', delimiter="_") index_name = "stimulation_"+pinyin_task_name index_type = "stimulation_results" es_results = es_prediction.get(index=index_name, doc_type=index_type, id=update_time)["_source"] future_results = json.loads(es_results["future_results"]) results = [] for start_uid, end_dict in future_results.iteritems(): tmp_uid_list = end_dict.keys() tmp_uid_list.append(start_uid) tmp_profile = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type,body={"ids":tmp_uid_list})["docs"] tmp_profile_dict = dict() for item in tmp_profile: if item["found"]: nick_name = item["_source"]["nick_name"] fansnum = item["_source"]["fansnum"] else: nick_name = start_uid fansnum = '' tmp_iter = dict() tmp_iter["nick_name"] = nick_name tmp_iter["fansnum"] = fansnum tmp_iter["uid"] = item["_id"] if item["_id"] != start_uid: tmp_iter["retweeted"] = end_dict[item["_id"]] tmp_profile_dict[item["_id"]] = tmp_iter tmp = [] tmp.extend([start_uid, tmp_profile_dict[start_uid]["nick_name"], tmp_profile_dict[start_uid]["fansnum"]]) tmp.append(len(end_dict)) tmp.append(int(sum(end_dict.values()))) tmp_profile_dict.pop(start_uid) tmp_sorted_profile = sorted(tmp_profile_dict.values(), key=lambda x:x["retweeted"], reverse=True) tmp.append(tmp_sorted_profile) results.append(tmp) results = sorted(results, key=lambda x:x[3], reverse=True) return json.dumps(results)
def ajax_add_social_sensor(): finish = ["0"] add_user = request.args.get("add_users", '') # &.join task_detail = es_prediction.get(index="manage_sensing_task", doc_type="task", id="social_sensing_task")["_source"] sensors = json.loads(task_detail["social_sensors"]) if add_user: uid_list = add_user.split(",") #改成了“,” if uid_list: in_set = set(uid_list) & set(sensors) out_set = set(uid_list) - set(sensors) if out_set: new_list = list(set(uid_list) | set(sensors)) task_detail["social_sensors"] = json.dumps(new_list) es_prediction.index(index="manage_sensing_task", doc_type="task", id="social_sensing_task", body=task_detail) finish = ["1"] results = [list(in_set), list(out_set)] #return json.dumps(results) return json.dumps(finish)
def get_origin_weibo_detail(ts, size, order, message_type=1): topic_value_dict = json.loads(r.get("topic_value_dict")) task_detail = es_prediction.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] mid_value = json.loads(task_detail['mid_topic_value']) duplicate_dict = json.loads(task_detail['duplicate_dict']) tmp_duplicate_dict = dict() for k, v in duplicate_dict.iteritems(): try: tmp_duplicate_dict[v].append(k) except: tmp_duplicate_dict[v] = [k, v] if message_type == 1: weibo_detail = json.loads(task_detail['origin_weibo_detail']) elif message_type == 2: weibo_detail = json.loads(task_detail['retweeted_weibo_detail']) else: weibo_detail = json.loads(task_detail['sensitive_weibo_detail']) weibo_detail_list = [] if weibo_detail: for iter_mid, item in weibo_detail.iteritems(): tmp = [] tmp.append(iter_mid) tmp.append(item[iter_mid]) tmp.append(item['retweeted']) tmp.append(item['comment']) weibo_detail_list.append(tmp) mid_list = weibo_detail.keys() results = [] query_body = { "query": { "filtered": { "filter": { "terms": { "mid": mid_list } } } }, "size": 1000, "sort": { "timestamp": { "order": "desc" } } } index_list = [] datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts - DAY) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) if exist_es: index_list.append(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) if exist_es_1: index_list.append(index_name_1) if index_list and mid_list: search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] uid_list = [] text_dict = dict() # 文本信息 portrait_dict = dict() # 背景信息 sort_results = [] if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) text_dict[item['_id']] = item['_source'] # _id是mid if uid_list: portrait_result = es_profile.mget( index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list}, fields=['nick_name', 'photo_url'])["docs"] for item in portrait_result: if item['found']: portrait_dict[item['_id']] = { "nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0] } else: portrait_dict[item['_id']] = { "nick_name": item['_id'], "photo_url": "" } if order == "total": sorted_list = sorted(weibo_detail_list, key=lambda x: x[1], reverse=True) elif order == "retweeted": sorted_list = sorted(weibo_detail_list, key=lambda x: x[2], reverse=True) elif order == "comment": sorted_list = sorted(weibo_detail_list, key=lambda x: x[3], reverse=True) else: sorted_list = weibo_detail_list count_n = 0 results_dict = dict() mid_index_dict = dict() for item in sorted_list: # size mid = item[0] iter_text = text_dict.get(mid, {}) temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type if iter_text: uid = iter_text['uid'] temp.append(uid) iter_portrait = portrait_dict.get(uid, {}) if iter_portrait: temp.append(iter_portrait['nick_name']) temp.append(iter_portrait['photo_url']) else: temp.extend([uid, '']) temp.append(iter_text["text"]) temp.append(iter_text["sentiment"]) temp.append(ts2date(iter_text['timestamp'])) temp.append(iter_text['geo']) if message_type == 1: temp.append(1) elif message_type == 2: temp.append(3) else: temp.append(iter_text['message_type']) temp.append(item[2]) temp.append(item[3]) temp.append(iter_text.get('sensitive', 0)) temp.append(iter_text['timestamp']) temp.append(mid_value[mid]) temp.append(mid) results.append(temp) count_n += 1 results = sorted(results, key=operator.itemgetter(-4, -2, -6), reverse=True) # -4 -2 -3 sort_results = [] count = 0 for item in results: sort_results.append([item]) mid_index_dict[item[-1]] = count count += 1 if tmp_duplicate_dict: remove_list = [] value_list = tmp_duplicate_dict.values() # [[mid, mid], ] for item in value_list: tmp = [] for mid in item: if mid_index_dict.get(mid, 0): tmp.append(mid_index_dict[mid]) if len(tmp) > 1: tmp_min = min(tmp) else: continue tmp.remove(tmp_min) for iter_count in tmp: sort_results[tmp_min].extend(sort_results[iter_count]) remove_list.append(sort_results[iter_count]) if remove_list: for item in remove_list: sort_results.remove(item) return sort_results
def get_retweet_weibo_detail(ts, size, text_type, type_value): task_detail = es_prediction.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] origin_weibo_detail = json.loads(task_detail['origin_weibo_detail']) retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail']) mid_list = [] mid_list.extend(origin_weibo_detail.keys()) mid_list.extend(retweeted_weibo_detail.keys()) query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "range": { "timestamp": { "gte": ts - time_interval, "lt": ts } } }, { "terms": { "root_mid": mid_list } }] } } } }, "sort": { "timestamp": { "order": "desc" } }, "size": 100 } if text_type == "message_type": query_body['query']['filtered']['filter']['bool']['must'].append( {"term": { text_type: type_value }}) if text_type == "sentiment": #if isinstance(type_value, str): if len(type_value) == 1: query_body['query']['filtered']['filter']['bool']['must'].append( {"term": { text_type: type_value }}) else: query_body['query']['filtered']['filter']['bool']['must'].append( {"terms": { text_type: type_value }}) datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts - time_interval) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) # 1. 查询微博 if datetime == datetime_1 and exist_es: search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] elif datetime != datetime_1 and exist_es_1: search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] #print search_results # 2. 获取微博相关信息 results = [] uid_list = [] if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) if uid_list: portrait_result = es_profile.mget( index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list}, fields=['nick_name', 'photo_url'])["docs"] for i in range(len(uid_list)): item = search_results[i]['_source'] temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type temp.append(item['uid']) if portrait_result[i]['found']: temp.append(portrait_result[i]["fields"]["nick_name"][0]) temp.append(portrait_result[i]["fields"]["photo_url"][0]) else: temp.append(item['uid']) temp.append("") temp.append(item["text"]) #print item['text'] temp.append(item["sentiment"]) temp.append(ts2date(item['timestamp'])) temp.append(item['geo']) temp.append(item["message_type"]) results.append(temp) return results
def ajax_get_diffusion_path(): task_name = request.args.get('task_name', '') update_time = request.args.get("update_time", "") pinyin_task_name = pinyin.get(task_name.encode('utf-8'), format='strip', delimiter="_") index_name = "stimulation_" + pinyin_task_name index_type = "stimulation_results" es_results = es_prediction.get(index=index_name, doc_type=index_type, id=update_time)["_source"] #print 'keys::::::::',es_results.keys() #results = es_results["diffusion"] results = json.loads(es_results["diffusion_path"]) # uid_set = set() # root_uid_set = set() # for k,v in results.iteritems(): # uid_set.add(k) # uid_set = uid_set|set(v) # root_uid_set.add(k) # uid_list = list(uid_set) # root_uid_list = list(root_uid_set) # user_info = dict() # if uid_list: # es_results = es_user_profile.mget(index=profile_index_name, doc_type=profile_index_type,body={"ids":uid_list})["docs"] # for item in es_results: # tmp = dict() # if item["found"]: # item = item["_source"] # tmp["uid"] = item["uid"] # tmp["photo_url"] = item["photo_url"] # if item["nick_name"]: # tmp["nick_name"] = item["nick_name"] # else: # tmp["nick_name"] = item["uid"] # tmp["fansnum"] = item["fansnum"] # tmp["friendsnum"] = item["friendsnum"] # tmp["statusnum"] = item["statusnum"] # tmp["location"] = item["user_location"] # else: # tmp["uid"] = item["_id"] # tmp["photo_url"] = "" # tmp["nick_name"] = item["_id"] # tmp["fansnum"] = "" # tmp["friendsnum"] = "" # tmp["statusnum"] = "" # tmp["location"] = "" # user_info[tmp["uid"]] = tmp # results_dic_list = [] # for root_uid,uid_list in results.iteritems(): # results_dic_list_item = {} # results_dic_list_item["root_uid"] = root_uid # results_dic_list_item["uid_list"] = uid_list # results_dic_list.append(results_dic_list_item) # return json.dumps([results_dic_list, user_info]) # uid_name_list = [] # for uid in uid_list: # name = user_info[uid]["nick_name"] # if uid in root_uid_list: # if name : # uid_name_list.append([1,name]) # else: # uid_name_list.append([1,uid]) # else: # if name : # uid_name_list.append([2,name]) # else: # uid_name_list.append([2,uid]) # link_list = [] # for root_uid,uid_list in results.iteritems(): # for uid in uid_list: # root_name = return json.dumps(results)
def get_sensitive_text_detail(task_name, ts, user, order): _id = user + '-' + task_name task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] weibo_detail = json.loads(task_detail['sensitive_weibo_detail']) weibo_detail_list = [] if weibo_detail: for iter_mid, item in weibo_detail.iteritems(): tmp = [] tmp.append(iter_mid) tmp.append(item[iter_mid]) tmp.append(item['retweeted']) tmp.append(item['comment']) weibo_detail_list.append(tmp) mid_list = weibo_detail.keys() results = [] query_body = { "query": { "filtered": { "filter": { "terms": { "mid": mid_list } } } } } index_list = [] datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts - DAY) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) if exist_es: index_list.append(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) if exist_es_1: index_list.append(index_name_1) if index_list and mid_list: search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] uid_list = [] text_dict = dict() # 文本信息 portrait_dict = dict() # 背景信息 if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) text_dict[item['_id']] = item['_source'] # _id是mid if uid_list: portrait_result = es_profile.mget( index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list}, fields=['nick_name', 'photo_url'])["docs"] for item in portrait_result: if item['found']: portrait_dict[item['_id']] = { "nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0] } else: portrait_dict[item['_id']] = { "nick_name": item['_id'], "photo_url": "" } if order == "total": sorted_list = sorted(weibo_detail_list, key=lambda x: x[1], reverse=True) elif order == "retweeted": sorted_list = sorted(weibo_detail_list, key=lambda x: x[2], reverse=True) elif order == "comment": sorted_list = sorted(weibo_detail_list, key=lambda x: x[3], reverse=True) else: sorted_list = weibo_detail_list count_n = 0 for item in sorted_list: mid = item[0] iter_text = text_dict.get(mid, {}) temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type if iter_text: uid = iter_text['uid'] temp.append(uid) iter_portrait = portrait_dict.get(uid, {}) if iter_portrait: temp.append(iter_portrait['nick_name']) temp.append(iter_portrait['photo_url']) else: temp.extend([uid, '']) temp.append(iter_text["text"]) temp.append(iter_text["sentiment"]) temp.append(ts2date(iter_text['timestamp'])) temp.append(iter_text['geo']) temp.append(iter_text['message_type']) temp.append(item[2]) temp.append(item[3]) temp.append(iter_text.get('sensitive', 0)) count_n += 1 results.append(temp) if results and order == "ts": results = sorted(results, key=lambda x: x[5], reverse=True) if results and order == "sensitive": results = sorted(results, key=lambda x: x[-1], reverse=True) return results
def get_weibo_content(topic,start_ts,end_ts,opinion,sort_item='timestamp'): #微博内容 weibo_dict = {} #a = json.dumps(opinion) #opinion = '圣保罗_班底_巴西_康熙' #[u'毛泽东', u'纪念日', u'亲人', u'毛泽东思想', u'万岁'] #opinion = json.dumps(opinion) #opinion = '毛泽东_纪念日_亲人_毛泽东思想_万岁' # opinion_str = opinion[0] # opinion_str = '_'.join(opinion) # for i in range(1,len(opinion)): # opinion_str = opinion_str + '_' + opinion_str # query_body = { # 'query':{ # 'bool':{ # 'must':[ # {'wildcard':{'keys':opinion}}, # {'term':{'name':topic}}, # {'range':{'start_ts':{'lte':start_ts}}}, # {'range':{'end_ts':{'gte':end_ts}}} # ] # } # }, # 'size':100000 # } #没有查到uid 每次的id不一样 # print 'opinion:::::::;;',opinion # start_ts = int(start_ts) query_body = { 'query':{ 'bool':{ 'must':[ # {'wildcard':{'keys':'*'+opinion+'*'}}, {"match_phrase":{"keys": opinion}}, #{'term':{'keys':opinion}}, {'term':{'name':topic}}, {'range':{'start_ts':{'gte':start_ts}}}, {'range':{'end_ts':{'lte':end_ts}}} ] } }, 'size':1000000 } #没有查到uid 每次的id不一样 # query_body = { # 'query':{ # 'match_all':{} # }, # 'size':1000000 # } print 'query_body:::::;',query_body weibos = es.search(index=subopinion_index_name,doc_type=subopinion_index_type,body=query_body)['hits']['hits'] #print weibo_es,subopinion_index_name,subopinion_index_type,query_body print len(weibos) #keys_list = [] for weibo in weibos: print weibo['_source'].keys() print 'start_ts:::::::::',weibo["_source"]["start_ts"] print 'end_ts:::::::::::',weibo["_source"]["end_ts"] print 'name:::::::::::::',weibo['_source']["name"] print 'keys:::::::::::::',weibo["_source"]["keys"] #print 'keys_list:::::::',keys_list print 'opinion:::::',opinion print 'start_ts::::',start_ts print 'end_ts::::::',end_ts print 'topic:::::::',topic if weibos: # print 'weibos:::::::::::;',weibos[0]['_source']['keys'] weibos = json.loads(weibos[0]['_source']['cluster_dump_dict']) for weibo in weibos.values():#jln0825 weibo = weibo[0] weibo_content = {} weibo_content['text'] = weibo['text'] weibo_content['uid'] = weibo['uid'] weibo_content['timestamp'] = full_datetime2ts(weibo['datetime']) weibo_content['comment'] = weibo['comment'] weibo_content['retweeted'] = weibo['retweeted'] weibo_content['mid'] = weibo['id'] try: user = es.get(index=profile_index_name,doc_type=profile_index_type,id=weibo_content['uid'])['_source'] weibo_content['uname'] = user['nick_name'] weibo_content['photo_url'] = user['photo_url'] except: weibo_content['uname'] = 'unknown' weibo_content['photo_url'] = 'unknown' weibo_dict[weibo_content['mid']] = weibo_content results = sorted(weibo_dict.items(),key=lambda x:x[1][sort_item],reverse=True) #print results return results else: return 'no results'