def ajax_revise_task(): task_name = request.args.get('task_name', '') # must finish = request.args.get("finish", "10") stop_time = request.args.get('stop_time', '') # timestamp user = request.args.get('user', '') #now_ts = datetime2ts("2013-09-06") _id = user + '-' + task_name now_ts = time.time() if stop_time and stop_time < now_ts: return json.dumps([]) if task_name and user: task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] if stop_time: task_detail['stop_time'] = stop_time if int(finish) == 0: task_detail['finish'] = finish task_detail['processing_status'] = "1" # 重启时将处理状态改为 if stop_time or int(finish) == 0: es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail) return json.dumps(['1']) return json.dumps([])
def get_text_detail(task_name, ts, text_type, user, order, size=100): results = [] _id = user + '-' + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"] if int(text_type) == 0: # 热门原创微博 results = get_origin_weibo_detail(ts, user, task_name, size, order, 1) elif int(text_type) == 1: # 热门转发微博 results = get_origin_weibo_detail(ts, user, task_name, size, order, 2) elif int(text_type) == 2: # 普通转发微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "message_type", 3) elif int(text_type) == 3: # 普通评论微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "message_type", 2) elif int(text_type) == 4: # 积极微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", "1") elif int(text_type) == 5: # 中性微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", "0") elif int(text_type) == 6: # 消极微博 results = get_retweet_weibo_detail(ts, user, task_name, size, "sentiment", ["2", "3", "4", "5", "6"]) elif int(text_type) == 7: # 敏感微博 results = get_origin_weibo_detail(ts, user, task_name, size, order, 3) else: print "error" return results
def get_group_user_track(uid): results = [] #step1:get user_portrait activity_geo_dict try: portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\ id=uid, _source=False, fields=['activity_geo_dict']) except: portrait_result = {} if portrait_result == {}: return 'uid is not in user_portrait' activity_geo_dict = json.loads( portrait_result['fields']['activity_geo_dict'][0]) now_date_ts = datetime2ts(ts2datetime(int(time.time()))) start_ts = now_date_ts - DAY * len(activity_geo_dict) #step2: iter date to get month track for geo_item in activity_geo_dict: iter_date = ts2datetime(start_ts) sort_day_dict = sorted(geo_item.items(), key=lambda x: x[1], reverse=True) if sort_day_dict: results.append([iter_date, sort_day_dict[0][0]]) else: results.append([iter_date, '']) start_ts = start_ts + DAY return results
def ajax_get_group_detail(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', '') _id = user + '-' + task_name portrait_detail = [] top_activeness = get_top_influence("activeness") top_influence = get_top_influence("influence") top_importance = get_top_influence("importance") search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {}) if search_result: try: uid_list = json.loads(search_result['uid_list']) except: uid_list = search_result['uid_list'] if uid_list: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":uid_list}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) temp.append(item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100) elif iter_item == "importance": temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100) elif iter_item == "influence": temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return json.dumps(portrait_detail)
def get_group_user_track(uid): results = [] # step1:get user_portrait activity_geo_dict try: portrait_result = es_user_portrait.get( index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=False, fields=["activity_geo_dict"] ) except: portrait_result = {} if portrait_result == {}: return "uid is not in user_portrait" activity_geo_dict = json.loads(portrait_result["fields"]["activity_geo_dict"][0]) now_date_ts = datetime2ts(ts2datetime(int(time.time()))) start_ts = now_date_ts - DAY * len(activity_geo_dict) # step2: iter date to get month track for geo_item in activity_geo_dict: iter_date = ts2datetime(start_ts) sort_day_dict = sorted(geo_item.items(), key=lambda x: x[1], reverse=True) if sort_day_dict: results.append([iter_date, sort_day_dict[0][0]]) else: results.append([iter_date, ""]) start_ts = start_ts + DAY return results
def ajax_stop_task(): task_name = request.args.get('task_name','') # must user = request.args.get('user', '') if task_name and user: _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] #task_detail["finish"] = finish_signal task_detail['processing_status'] = '0' es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail) return json.dumps(['1']) else: return json.dumps([])
def ajax_get_group_detail(): task_name = request.args.get('task_name', '') # task_name user = request.args.get('user', '') _id = user + '-' + task_name portrait_detail = [] top_activeness = get_top_influence("activeness") top_influence = get_top_influence("influence") top_importance = get_top_influence("importance") search_result = es.get(index=index_group_manage, doc_type=doc_type_group, id=_id).get('_source', {}) if search_result: try: uid_list = json.loads(search_result['uid_list']) except: uid_list = search_result['uid_list'] if uid_list: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": uid_list}, fields=SOCIAL_SENSOR_INFO)['docs'] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append( item["fields"][iter_item][0].split('&')) temp.append( item["fields"][iter_item][0].split('&')) elif iter_item == "activeness": temp.append( math.log( item['fields']['activeness'][0] / float(top_activeness) * 9 + 1, 10) * 100) elif iter_item == "importance": temp.append( math.log( item['fields']['importance'][0] / float(top_importance) * 9 + 1, 10) * 100) elif iter_item == "influence": temp.append( math.log( item['fields']['influence'][0] / float(top_influence) * 9 + 1, 10) * 100) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return json.dumps(portrait_detail)
def ajax_stop_task(): task_name = request.args.get('task_name', '') # must user = request.args.get('user', '') if task_name and user: _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] #task_detail["finish"] = finish_signal task_detail['processing_status'] = '0' es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail) return json.dumps(['1']) else: return json.dumps([])
def ajax_get_task_detail_info(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', 'admin') _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] task_detail["social_sensors"] = json.loads(task_detail["social_sensors"]) #task_detail['keywords'] = json.loads(task_detail['keywords']) #task_detail["sensitive_words"]= json.loads(task_detail["sensitive_words"]) history_status = json.loads(task_detail['history_status']) if history_status: temp_list = [] """ temp_list.append(history_status[-1]) print history_status for item in history_status[:-1]: temp_list.append(item) """ sorted_list = sorted(history_status, key=lambda x:x, reverse=True) task_detail['history_status'] = sorted_list else: task_detail['history_status'] = [] task_detail['social_sensors_portrait'] = [] portrait_detail = [] if task_detail["social_sensors"]: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs'] if search_results: for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["_source"][iter_item].split('&')) else: temp.append(item["_source"][iter_item]) portrait_detail.append(temp) if portrait_detail: portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True) task_detail['social_sensors_portrait'] = portrait_detail return json.dumps(task_detail)
def ajax_get_task_detail_info(): task_name = request.args.get('task_name', '') # task_name user = request.args.get('user', 'admin') _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] history_status = json.loads(task_detail['history_status']) if history_status: temp_list = [] """ temp_list.append(history_status[-1]) print history_status for item in history_status[:-1]: temp_list.append(item) """ sorted_list = sorted(history_status, key=lambda x: x, reverse=True) task_detail['history_status'] = sorted_list else: task_detail['history_status'] = [] task_detail['social_sensors_portrait'] = [] portrait_detail = [] """ if task_detail["social_sensors"]: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": task_detail["social_sensors"]})['docs'] if search_results: for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["_source"][iter_item].split('&')) else: temp.append(item["_source"][iter_item]) portrait_detail.append(temp) if portrait_detail: portrait_detail = sorted(portrait_detail, key=lambda x:x[5], reverse=True) task_detail['social_sensors_portrait'] = portrait_detail """ return json.dumps(task_detail)
def ajax_get_clustering_topic(): task_name = request.args.get('task_name','') # task_name user = request.args.get('user', '') ts = int(request.args.get('ts', '')) # timestamp: 123456789 topic_list = [] _id = user + '-' + task_name task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] #burst_reason = task_detail['burst_reason'] burst_reason = 1 filter_list = [] if burst_reason: topic_list = task_detail.get("clustering_topic", []) if topic_list: topic_list = json.loads(topic_list) for item in topic_list: tmp = [] for word in item: if len(word) > 1: tmp.append(word) filter_list.append(tmp) return json.dumps(filter_list[:5])
def ajax_get_clustering_topic(): task_name = request.args.get('task_name', '') # task_name user = request.args.get('user', '') ts = int(request.args.get('ts', '')) # timestamp: 123456789 topic_list = [] _id = user + '-' + task_name task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] #burst_reason = task_detail['burst_reason'] burst_reason = 1 filter_list = [] if burst_reason: topic_list = task_detail.get("clustering_topic", []) if topic_list: topic_list = json.loads(topic_list) for item in topic_list: tmp = [] for word in item: if len(word) > 1: tmp.append(word) filter_list.append(tmp) return json.dumps(filter_list[:5])
def ajax_revise_task(): task_name = request.args.get('task_name','') # must finish = request.args.get("finish", "10") stop_time = request.args.get('stop_time', '') # timestamp user = request.args.get('user', '') #now_ts = datetime2ts("2013-09-06") _id = user + '-' + task_name now_ts = time.time() if stop_time and stop_time < now_ts: return json.dumps([]) if task_name and user: task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)['_source'] if stop_time: task_detail['stop_time'] = stop_time if int(finish) == 0: task_detail['finish'] = finish task_detail['processing_status'] = "1" # 重启时将处理状态改为 if stop_time or int(finish) == 0: es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail) return json.dumps(['1']) return json.dumps([])
def get_sensitive_text_detail(task_name, ts, user, order): _id = user + '-' + task_name task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] weibo_detail = json.loads(task_detail['sensitive_weibo_detail']) weibo_detail_list = [] if weibo_detail: for iter_mid, item in weibo_detail.iteritems(): tmp = [] tmp.append(iter_mid) tmp.append(item[iter_mid]) tmp.append(item['retweeted']) tmp.append(item['comment']) weibo_detail_list.append(tmp) mid_list = weibo_detail.keys() results = [] query_body = { "query":{ "filtered":{ "filter":{ "terms":{"mid": mid_list} } } } } index_list = [] datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts-DAY) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) if exist_es: index_list.append(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) if exist_es_1: index_list.append(index_name_1) if index_list and mid_list: search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] uid_list = [] text_dict = dict() # 文本信息 portrait_dict = dict() # 背景信息 if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) text_dict[item['_id']] = item['_source'] # _id是mid if uid_list: portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"] for item in portrait_result: if item['found']: portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]} else: portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""} if order == "total": sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True) elif order == "retweeted": sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True) elif order == "comment": sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True) else: sorted_list = weibo_detail_list count_n = 0 for item in sorted_list: mid = item[0] iter_text = text_dict.get(mid, {}) temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type if iter_text: uid = iter_text['uid'] temp.append(uid) iter_portrait = portrait_dict.get(uid, {}) if iter_portrait: temp.append(iter_portrait['nick_name']) temp.append(iter_portrait['photo_url']) else: temp.extend([uid,'']) temp.append(iter_text["text"]) temp.append(iter_text["sentiment"]) temp.append(ts2date(iter_text['timestamp'])) temp.append(iter_text['geo']) temp.append(iter_text['message_type']) temp.append(item[2]) temp.append(item[3]) temp.append(iter_text.get('sensitive', 0)) count_n += 1 results.append(temp) if results and order == "ts": results = sorted(results, key=lambda x:x[5], reverse=True) if results and order == "sensitive": results = sorted(results, key=lambda x:x[-1], reverse=True) return results
def get_sensitive_text_detail(task_name, ts, user, order): _id = user + '-' + task_name task_detail = es.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] weibo_detail = json.loads(task_detail['sensitive_weibo_detail']) weibo_detail_list = [] if weibo_detail: for iter_mid, item in weibo_detail.iteritems(): tmp = [] tmp.append(iter_mid) tmp.append(item[iter_mid]) tmp.append(item['retweeted']) tmp.append(item['comment']) weibo_detail_list.append(tmp) mid_list = weibo_detail.keys() results = [] query_body = { "query": { "filtered": { "filter": { "terms": { "mid": mid_list } } } } } index_list = [] datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts - DAY) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) if exist_es: index_list.append(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) if exist_es_1: index_list.append(index_name_1) if index_list and mid_list: search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] uid_list = [] text_dict = dict() # 文本信息 portrait_dict = dict() # 背景信息 if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) text_dict[item['_id']] = item['_source'] # _id是mid if uid_list: portrait_result = es_profile.mget( index=profile_index_name, doc_type=profile_index_type, body={"ids": uid_list}, fields=['nick_name', 'photo_url'])["docs"] for item in portrait_result: if item['found']: portrait_dict[item['_id']] = { "nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0] } else: portrait_dict[item['_id']] = { "nick_name": item['_id'], "photo_url": "" } if order == "total": sorted_list = sorted(weibo_detail_list, key=lambda x: x[1], reverse=True) elif order == "retweeted": sorted_list = sorted(weibo_detail_list, key=lambda x: x[2], reverse=True) elif order == "comment": sorted_list = sorted(weibo_detail_list, key=lambda x: x[3], reverse=True) else: sorted_list = weibo_detail_list count_n = 0 for item in sorted_list: mid = item[0] iter_text = text_dict.get(mid, {}) temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type if iter_text: uid = iter_text['uid'] temp.append(uid) iter_portrait = portrait_dict.get(uid, {}) if iter_portrait: temp.append(iter_portrait['nick_name']) temp.append(iter_portrait['photo_url']) else: temp.extend([uid, '']) temp.append(iter_text["text"]) temp.append(iter_text["sentiment"]) temp.append(ts2date(iter_text['timestamp'])) temp.append(iter_text['geo']) temp.append(iter_text['message_type']) temp.append(item[2]) temp.append(item[3]) temp.append(iter_text.get('sensitive', 0)) count_n += 1 results.append(temp) if results and order == "ts": results = sorted(results, key=lambda x: x[5], reverse=True) if results and order == "sensitive": results = sorted(results, key=lambda x: x[-1], reverse=True) return results
def imagine(uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type): default_setting_dict = query_fields_dict print query_fields_dict personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] # tag tag_dict = dict() tag_dict_value = 0 if "tag" in query_fields_dict: tag_dict_value = query_fields_dict["tag"] query_fields_dict.pop("tag") for key,value in personal_info.iteritems(): if "tag-" in key: tag_dict[key] = value print tag_dict, tag_dict_value # size sort_size = query_fields_dict["size"] query_fields_dict.pop("size") keys_list = [] for k, v in query_fields_dict.iteritems(): if v: keys_list.append(k) #需要进行关联的键 search_dict = {} # 检索的属性字典 iter_list = [] tag_attri_vaule = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if iter_key in personal_info: if not personal_info[iter_key] or not query_fields_dict[iter_key]: query_fields_dict.pop(iter_key) continue else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') """ query_fields_dict.pop(iter_key) if tag_dict.get(iter_key,''): tag_attri_vaule.append(iter_key+"-"+tag_dict[iter_key]) """ if len(iter_list) == 0 and len(tag_dict) == 0: return [] query_body = { 'query':{ 'function_score':{ 'query':{ 'bool':{ 'must':[ ] } } } } } number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body["size"] = sort_size+100 for (k,v) in query_fields_dict.items(): temp = {} temp_list = [] if k in personal_info and v != 0: for iter_key in search_dict[k]: temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*', 'boost': v}}}) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) if tag_dict and tag_dict_value: temp_list = [] for k,v in tag_dict.iteritems(): temp = {"term":{k:v}} temp_list.append(temp) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid','uname', 'activeness','importance', 'influence', 'sensitive'] evaluate_index_list = ['activeness', 'importance', 'influence', 'sensitive'] return_list = [] count = 0 if len(result) > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item['_id']: score = item['_score'] continue info = [] for field in field_list: if field in evaluate_index_list: value = item['_source'][field] normal_value = math.log(value / float(evaluate_max_dict[field] )* 9 + 1, 10) * 100 else: normal_value = item['_source'][field] if not normal_value: normal_value = item['_id'] info.append(normal_value) info.append(item['_score']/float(top_score)*100) return_list.append(info) count += 1 if count == sort_size: break return_list.append(number) temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log(value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(return_list) return results
def show_important_users(task_name): return_results = dict() # 返回字典 top_influence = get_top_influence() task_detail = es.get(index=index_manage_social_task, doc_type=task_doc_type, id=task_name)["_source"] portrait_detail = [] important_user_set = set() # 重要人物列表 history_status = json.loads(task_detail['history_status']) start_time = int(task_detail['create_at']) stop_time = int(task_detail['stop_time']) time_series = [] keywords_list = json.loads(task_detail['keywords']) return_results['keywords'] = keywords_list return_results['remark'] = task_detail['remark'] social_sensors = json.loads(task_detail['social_sensors']) for item in history_status: time_series.append(item[0]) # return social sensors details if social_sensors: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":social_sensors},fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])["docs"] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return_results['social_sensors_detail'] = portrait_detail if time_series: flow_detail = es.mget(index=index_sensing_task, doc_type=task_name, body={"ids": time_series})['docs'] else: flow_detail = {} if flow_detail: for item in flow_detail: item = item['_source'] temp_user_list = json.loads(item['important_users']) important_user_set = important_user_set | set(temp_user_list) top_importance = get_top_influence('importance') top_activeness = get_top_influence('activeness') top_influence = get_top_influence('influence') important_uid_list = list(important_user_set) user_detail_info = [] # if important_uid_list: user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list},fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence','activeness'])['docs'] for item in user_results: if item['found']: temp = [] if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD: continue temp.append(item['fields']['uid'][0]) temp.append(item['fields']['uname'][0]) temp.append(item['fields']['photo_url'][0]) temp.append(item['fields']['domain'][0]) temp.append(item['fields']['topic_string'][0].split('&')) hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time, keywords_list) temp.append(hot_count) importance = math.log(item['fields']['importance'][0]/top_importance*9+1, 10)*100 temp.append("%.2f" %importance) temp.append(math.log(item['fields']['influence'][0]/top_influence*9+1, 10)*100) temp.append(math.log(item['fields']['activeness'][0]/top_activeness*9+1, 10)*100) user_detail_info.append(temp) return_results['group_list'] = user_detail_info return return_results
def get_retweet_weibo_detail(ts, user, task_name, size, text_type, type_value): _id = user + '-' + task_name task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] origin_weibo_detail = json.loads(task_detail['origin_weibo_detail']) retweeted_weibo_detail = json.loads(task_detail['retweeted_weibo_detail']) mid_list = [] mid_list.extend(origin_weibo_detail.keys()) mid_list.extend(retweeted_weibo_detail.keys()) query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"range":{ "timestamp":{ "gte": ts - time_interval, "lt": ts } }}, {"terms": {"root_mid": mid_list}} ] } } } }, "sort": {"timestamp": {"order": "desc"}}, "size": 100 } if text_type == "message_type": query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}}) if text_type == "sentiment": #if isinstance(type_value, str): if len(type_value) == 1: query_body['query']['filtered']['filter']['bool']['must'].append({"term":{text_type: type_value}}) else: query_body['query']['filtered']['filter']['bool']['must'].append({"terms":{text_type: type_value}}) datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts-time_interval) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) # 1. 查询微博 if datetime == datetime_1 and exist_es: search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] elif datetime != datetime_1 and exist_es_1: search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] #print search_results # 2. 获取微博相关信息 results = [] uid_list = [] if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) if uid_list: portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"] for i in range(len(uid_list)): item = search_results[i]['_source'] temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type temp.append(item['uid']) if portrait_result[i]['found']: temp.append(portrait_result[i]["fields"]["nick_name"][0]) temp.append(portrait_result[i]["fields"]["photo_url"][0]) else: temp.append(item['uid']) temp.append("") temp.append(item["text"]) #print item['text'] temp.append(item["sentiment"]) temp.append(ts2date(item['timestamp'])) temp.append(item['geo']) temp.append(item["message_type"]) results.append(temp) return results
def get_origin_weibo_detail(ts, user, task_name, size, order, message_type=1): _id = user + '-' + task_name task_detail = es_user_portrait.get(index=index_sensing_task, doc_type=_id, id=ts)['_source'] mid_value = json.loads(task_detail['mid_topic_value']) duplicate_dict = json.loads(task_detail['duplicate_dict']) tmp_duplicate_dict = dict() for k,v in duplicate_dict.iteritems(): try: tmp_duplicate_dict[v].append(k) except: tmp_duplicate_dict[v] = [k, v] if message_type == 1: weibo_detail = json.loads(task_detail['origin_weibo_detail']) elif message_type == 2: weibo_detail = json.loads(task_detail['retweeted_weibo_detail']) else: weibo_detail = json.loads(task_detail['sensitive_weibo_detail']) weibo_detail_list = [] if weibo_detail: for iter_mid, item in weibo_detail.iteritems(): tmp = [] tmp.append(iter_mid) tmp.append(item[iter_mid]) tmp.append(item['retweeted']) tmp.append(item['comment']) weibo_detail_list.append(tmp) mid_list = weibo_detail.keys() results = [] query_body = { "query":{ "filtered":{ "filter":{ "terms":{"mid": mid_list} } } }, "size": 1000, "sort": {"timestamp": {"order": "desc"}} } index_list = [] datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts-DAY) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) if exist_es: index_list.append(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_es_1 = es_text.indices.exists(index_name_1) if exist_es_1: index_list.append(index_name_1) if index_list and mid_list: search_results = es_text.search(index=index_list, doc_type=flow_text_index_type, body=query_body)["hits"]["hits"] else: search_results = [] uid_list = [] text_dict = dict() # 文本信息 portrait_dict = dict() # 背景信息 sort_results = [] if search_results: for item in search_results: uid_list.append(item["_source"]['uid']) text_dict[item['_id']] = item['_source'] # _id是mid if uid_list: portrait_result = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":uid_list}, fields=['nick_name', 'photo_url'])["docs"] for item in portrait_result: if item['found']: portrait_dict[item['_id']] = {"nick_name": item["fields"]["nick_name"][0], "photo_url": item["fields"]["photo_url"][0]} else: portrait_dict[item['_id']] = {"nick_name": item['_id'], "photo_url":""} if order == "total": sorted_list = sorted(weibo_detail_list, key=lambda x:x[1], reverse=True) elif order == "retweeted": sorted_list = sorted(weibo_detail_list, key=lambda x:x[2], reverse=True) elif order == "comment": sorted_list = sorted(weibo_detail_list, key=lambda x:x[3], reverse=True) else: sorted_list = weibo_detail_list count_n = 0 results_dict = dict() mid_index_dict = dict() for item in sorted_list: # size mid = item[0] iter_text = text_dict.get(mid, {}) temp = [] # uid, nick_name, photo_url, text, sentiment, timestamp, geo, common_keywords, message_type if iter_text: uid = iter_text['uid'] temp.append(uid) iter_portrait = portrait_dict.get(uid, {}) if iter_portrait: temp.append(iter_portrait['nick_name']) temp.append(iter_portrait['photo_url']) else: temp.extend([uid,'']) temp.append(iter_text["text"]) temp.append(iter_text["sentiment"]) temp.append(ts2date(iter_text['timestamp'])) temp.append(iter_text['geo']) if message_type == 1: temp.append(1) elif message_type == 2: temp.append(3) else: temp.append(iter_text['message_type']) temp.append(item[2]) temp.append(item[3]) temp.append(iter_text.get('sensitive', 0)) temp.append(iter_text['timestamp']) temp.append(mid_value[mid]) temp.append(mid) results.append(temp) count_n += 1 results = sorted(results, key=operator.itemgetter(-4, -2, -6), reverse=True) # -4 -2 -3 sort_results = [] count = 0 for item in results: sort_results.append([item]) mid_index_dict[item[-1]] = count count += 1 if tmp_duplicate_dict: remove_list = [] value_list = tmp_duplicate_dict.values() # [[mid, mid], ] for item in value_list: tmp = [] for mid in item: if mid_index_dict.get(mid, 0): tmp.append(mid_index_dict[mid]) if len(tmp) > 1: tmp_min = min(tmp) else: continue tmp.remove(tmp_min) for iter_count in tmp: sort_results[tmp_min].extend(sort_results[iter_count]) remove_list.append(sort_results[iter_count]) if remove_list: for item in remove_list: sort_results.remove(item) return sort_results
def show_important_users(task_name): return_results = dict() # 返回字典 top_influence = get_top_influence() task_detail = es.get(index=index_manage_social_task, doc_type=task_doc_type, id=task_name)["_source"] portrait_detail = [] important_user_set = set() # 重要人物列表 history_status = json.loads(task_detail['history_status']) start_time = int(task_detail['create_at']) stop_time = int(task_detail['stop_time']) time_series = [] keywords_list = json.loads(task_detail['keywords']) return_results['keywords'] = keywords_list return_results['remark'] = task_detail['remark'] social_sensors = json.loads(task_detail['social_sensors']) for item in history_status: time_series.append(item[0]) # return social sensors details if social_sensors: search_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": social_sensors}, fields=[ 'uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness' ])["docs"] for item in search_results: temp = [] if item['found']: for iter_item in SOCIAL_SENSOR_INFO: if iter_item == "topic_string": temp.append(item["fields"][iter_item][0].split('&')) else: temp.append(item["fields"][iter_item][0]) portrait_detail.append(temp) return_results['social_sensors_detail'] = portrait_detail if time_series: flow_detail = es.mget(index=index_sensing_task, doc_type=task_name, body={"ids": time_series})['docs'] else: flow_detail = {} if flow_detail: for item in flow_detail: item = item['_source'] temp_user_list = json.loads(item['important_users']) important_user_set = important_user_set | set(temp_user_list) top_importance = get_top_influence('importance') top_activeness = get_top_influence('activeness') top_influence = get_top_influence('influence') important_uid_list = list(important_user_set) user_detail_info = [] # if important_uid_list: user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids": important_uid_list}, fields=[ 'uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness' ])['docs'] for item in user_results: if item['found']: temp = [] if int(item['fields']['importance'] [0]) < IMPORTANT_USER_THRESHOULD: continue temp.append(item['fields']['uid'][0]) temp.append(item['fields']['uname'][0]) temp.append(item['fields']['photo_url'][0]) temp.append(item['fields']['domain'][0]) temp.append(item['fields']['topic_string'][0].split('&')) hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time, keywords_list) temp.append(hot_count) importance = math.log( item['fields']['importance'][0] / top_importance * 9 + 1, 10) * 100 temp.append("%.2f" % importance) temp.append( math.log( item['fields']['influence'][0] / top_influence * 9 + 1, 10) * 100) temp.append( math.log( item['fields']['activeness'][0] / top_activeness * 9 + 1, 10) * 100) user_detail_info.append(temp) return_results['group_list'] = user_detail_info return return_results
def get_task_detail_2(task_name, ts, user): results = dict() index_name = task_name _id = user + "-" + task_name task_detail = es.get(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)["_source"] task_name = task_detail['task_name'] history_status = json.loads(task_detail['history_status']) start_time = task_detail['create_at'] create_by = task_detail['create_by'] stop_time = task_detail['stop_time'] remark = task_detail.get('remark', '') portrait_detail = [] count = 0 # 计数 top_influence = get_top_influence("influence") top_activeness = get_top_influence("activeness") top_importance = get_top_influence("importance") time_series = [] # 时间 #positive_sentiment_list = [] # 情绪列表 #neutral_sentiment_list = [] #negetive_sentiment_list = [] all_weibo_list = [] origin_weibo_list = [] # 微博列表 retweeted_weibo_list = [] #retweeted_weibo_count = [] # 别人转发他的数量 #comment_weibo_count = [] #total_number_count = [] #burst_time_list = [] # 爆发时间列表 important_user_set = set() # 重要人物列表 out_portrait_users = set() # 未入库 ts = int(ts) time_series = history_status #for item in history_status: # if int(item[0]) <= ts: # time_series.append(item[0]) # 到目前为止的所有的时间戳 # get detail task information from es if time_series: flow_detail = es.mget(index=index_sensing_task, doc_type=_id, body={"ids": time_series})['docs'] else: flow_detail = {} if flow_detail: for item in flow_detail: item = item['_source'] timestamp = item['timestamp'] #sentiment_distribution = json.loads(item["sentiment_distribution"]) #positive_sentiment_list.append(int(sentiment_distribution['1'])) #negetive_sentiment_list.append(int(sentiment_distribution['2'])+int(sentiment_distribution['3']) \ # +int(sentiment_distribution['4'])+int(sentiment_distribution['5'])+int(sentiment_distribution['6'])) #neutral_sentiment_list.append(int(sentiment_distribution['0'])) origin_weibo_list.append(item["origin_weibo_number"]) # real retweeted_weibo_list.append(item['retweeted_weibo_number']) # real all_weibo_list.append(item["origin_weibo_number"]+item['retweeted_weibo_number']) #retweeted_weibo_count.append(item['retweeted_weibo_count']) #comment_weibo_count.append(item['comment_weibo_count']) #total_number_count.append(item['weibo_total_number']) temp_important_user_list = json.loads(item['important_users']) unfiltered_users = json.loads(item['unfilter_users']) temp_out_portrait_users = set(unfiltered_users) - set(temp_important_user_list) # 未入库 important_user_set = important_user_set | set(temp_important_user_list) out_portrait_users = out_portrait_users | set(temp_out_portrait_users) #burst_reason = item.get("burst_reason", "") #if burst_reason: # burst_time_list.append([timestamp, count, burst_reason]) count += 1 #################################################################################### # 统计爆发原因,下相应的结论 """ weibo_variation_count = 0 weibo_variation_time = [] sentiment_variation_count = 0 sentiment_variation_time = [] sensitive_variation_count = 0 # sensitive sensitive_variation_time = [] # sensitive common_variation_count = 0 common_variation_time = [] if burst_time_list: for item in burst_time_list: tmp_common = 0 x1 = 0 x2 = 0 x3 = 0 if signal_count_varition in item[2]: weibo_variation_count += 1 weibo_variation_time.append([ts2date_min(item[0]), total_number_count[item[1]]]) x1 = total_number_count[item[1]] tmp_common += 1 if signal_sentiment_varition in item[2]: tmp_common += 1 sentiment_variation_count += 1 x2 = negetive_sentiment_list[item[1]] sentiment_variation_time.append([ts2date_min(item[0]), negetive_sentiment_list[item[1]]]) if signal_sensitive_variation in item[2]: tmp_common += 1 sensitive_variation_count += 1 x3 = sensitive_total_number_list[item[1]] sensitive_variation_time.append([ts2date_min(item[0]), all_weibo_list[item[1]]]) if tmp_common >= 2: common_variation_count += 1 common_variation_time.append([ts2date_min(item[0]), x1, x2, x3]) warning_conclusion = remark variation_distribution = [] if weibo_variation_count: variation_distribution.append(weibo_variation_time) else: variation_distribution.append([]) if sentiment_variation_count: variation_distribution.append(sentiment_variation_time) else: variation_distribution.append([]) if sensitive_variation_count: variation_distribution.append(sensitive_variation_time) else: variation_distribution.append([]) if common_variation_count: variation_distribution.append(common_variation_time) else: variation_distribution.append([]) results['warning_conclusion'] = warning_conclusion results['variation_distribution'] = variation_distribution # 每个用户的热度 """ # 获取重要用户的个人信息 important_uid_list = list(important_user_set) out_portrait_users_list = list(out_portrait_users) user_detail_info = [] # out_user_detail_info = [] if important_uid_list: user_results = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":important_uid_list}, fields=['uid', 'uname', 'domain', 'topic_string', "photo_url", 'importance', 'influence', 'activeness'])['docs'] for item in user_results: if item['found']: temp = [] #if int(item['fields']['importance'][0]) < IMPORTANT_USER_THRESHOULD: # continue temp.append(item['fields']['uid'][0]) uname = item['fields']['uname'][0] if not uname or uname == "未知": uname = item['fields']['uid'][0] temp.append(uname) temp.append(item['fields']['photo_url'][0]) temp.append(item['fields']['domain'][0]) temp.append(item['fields']['topic_string'][0].split('&')) #hot_count = count_hot_uid(item['fields']['uid'][0], start_time, stop_time) #temp.append(hot_count) temp.append(math.log(item['fields']['importance'][0]/float(top_importance)*9+1, 10)*100) temp.append(math.log(item['fields']['influence'][0]/float(top_influence)*9+1, 10)*100) temp.append(math.log(item['fields']['activeness'][0]/float(top_activeness)*9+1, 10)*100) user_detail_info.append(temp) # 排序 if user_detail_info: user_detail_info = sorted(user_detail_info, key=lambda x:x[6], reverse=True) else: user_detail_info = [] if out_portrait_users_list: profile_results = es_profile.mget(index=profile_index_name, doc_type=profile_index_type, body={"ids":out_portrait_users_list})["docs"] bci_index = "bci_" + ts2datetime(ts-DAY).replace('-','') influence_results = es.mget(index=bci_index, doc_type="bci", body={"ids":out_portrait_users_list}, fields=["user_index"])['docs'] bci_results = es_profile.mget(index="bci_history", doc_type="bci", body={"ids":out_portrait_users_list}, fields=['user_fansnum'])['docs'] top_influence = get_top_all_influence("user_index", ts) count = 0 if profile_results: for item in profile_results: temp = [] if item['found']: uid = item['_source']['uid'] temp = get_user_profile([uid], ['nick_name', 'user_location', 'statusnum', 'fansnum'])[0] else: temp = [item['_id'], item['_id'], '', '', ''] """ if item['found']: temp.append(item['_source']['uid']) if item['_source']['nick_name']: temp.append(item['_source']['nick_name']) else: temp.append(item['_source']['uid']) temp.append(item['_source']['user_location']) temp.append(item['_source']['statusnum']) temp.append(item['_source']['friendsnum']) else: temp.append(item['_id']) temp.append(item['_id']) temp.extend(['']) temp.append('--') temp.append('--') try: user_fansnum = bci_results[count]["fields"]["user_fansnum"][0] except: user_fansnum = 0 temp.append(user_fansnum) temp_influ = influence_results[count] if temp_influ.get('found', 0): user_index = temp_influ['fields']['user_index'][0] temp.append(math.log(user_index/float(top_influence)*9+1, 10)*100) else: temp.append(0) """ count += 1 out_user_detail_info.append(temp) revise_time_series = [] for item in time_series: revise_time_series.append(ts2date_min(item)) results['important_user_detail'] = user_detail_info results['out_portrait_user_detail'] = out_user_detail_info #results['burst_time'] = burst_time_list # 爆发时间点,以及爆发原因 results['time_series'] = revise_time_series #results['positive_sentiment_list'] = positive_sentiment_list #esults['negetive_sentiment_list'] = negetive_sentiment_list #results['neutral_sentiment_list'] = neutral_sentiment_list results['all_weibo_list'] = all_weibo_list results['origin_weibo_list'] = origin_weibo_list results['retweeted_weibo_list'] = retweeted_weibo_list #results['comment_weibo_count'] = comment_weibo_count #results['retweeted_weibo_count'] = retweeted_weibo_count #results['total_number_list'] = total_number_count return results
def imagine(uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type): default_setting_dict = query_fields_dict print query_fields_dict personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source'] # tag tag_dict = dict() tag_dict_value = 0 if "tag" in query_fields_dict: tag_dict_value = query_fields_dict["tag"] query_fields_dict.pop("tag") for key, value in personal_info.iteritems(): if "tag-" in key: tag_dict[key] = value print tag_dict, tag_dict_value # size sort_size = query_fields_dict["size"] query_fields_dict.pop("size") keys_list = [] for k, v in query_fields_dict.iteritems(): if v: keys_list.append(k) #需要进行关联的键 search_dict = {} # 检索的属性字典 iter_list = [] tag_attri_vaule = [] # 对搜索的键值进行过滤,去掉无用的键 for iter_key in keys_list: if iter_key in personal_info: if not personal_info[iter_key] or not query_fields_dict[iter_key]: query_fields_dict.pop(iter_key) continue else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') """ query_fields_dict.pop(iter_key) if tag_dict.get(iter_key,''): tag_attri_vaule.append(iter_key+"-"+tag_dict[iter_key]) """ if len(iter_list) == 0 and len(tag_dict) == 0: return [] query_body = { 'query': { 'function_score': { 'query': { 'bool': { 'must': [] } } } } } number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body["size"] = sort_size + 100 for (k, v) in query_fields_dict.items(): temp = {} temp_list = [] if k in personal_info and v != 0: for iter_key in search_dict[k]: temp_list.append({ 'wildcard': { k: { 'wildcard': '*' + iter_key + '*', 'boost': v } } }) query_body['query']['function_score']['query']['bool'][ 'must'].append({'bool': { 'should': temp_list }}) if tag_dict and tag_dict_value: temp_list = [] for k, v in tag_dict.iteritems(): temp = {"term": {k: v}} temp_list.append(temp) query_body['query']['function_score']['query']['bool']['must'].append( {'bool': { 'should': temp_list }}) result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = [ 'uid', 'uname', 'activeness', 'importance', 'influence', 'sensitive' ] evaluate_index_list = [ 'activeness', 'importance', 'influence', 'sensitive' ] return_list = [] count = 0 if len(result) > 1 and result: if result[0]['_id'] != uid: top_score = result[0]['_score'] else: top_score = result[1]['_score'] #get evaluate max to normal evaluate_max_dict = get_evaluate_max() for item in result: if uid == item['_id']: score = item['_score'] continue info = [] for field in field_list: if field in evaluate_index_list: value = item['_source'][field] normal_value = math.log( value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100 else: normal_value = item['_source'][field] if not normal_value: normal_value = item['_id'] info.append(normal_value) info.append(item['_score'] / float(top_score) * 100) return_list.append(info) count += 1 if count == sort_size: break return_list.append(number) temp_list = [] for field in field_list: if field in evaluate_index_list: value = personal_info[field] normal_value = math.log( value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100 else: normal_value = personal_info[field] temp_list.append(normal_value) results = [] results.append(temp_list) results.extend(return_list) return results