def get_user_sensitive_words(uid): user_sensitive_words_dict = {} if RUN_TYPE: now_ts = time.time() now_date = ts2datetime(now_ts) # 2015-09-22 else: now_date = "2013-09-08" ts = datetime2ts(now_date) #test #ts = datetime2ts('2013-09-08') for i in range(1,8): ts = ts - 3600*24 date = ts2datetime(ts).replace('-','') results = r_cluster.hget('sensitive_'+str(ts), uid) if results: sensitive_words_dict = json.loads(results) for word in sensitive_words_dict: if user_sensitive_words_dict.has_key(word): user_sensitive_words_dict[word] += sensitive_words_dict[word] else: user_sensitive_words_dict[word] = sensitive_words_dict[word] sort_sensitive_words_dict = sorted(user_sensitive_words_dict.items(), key=lambda x:x[1], reverse=True) return sort_sensitive_words_dict
def get_group_user_track(uid): results = [] # step1:get user_portrait activity_geo_dict try: portrait_result = es_user_portrait.get( index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=False, fields=["activity_geo_dict"] ) except: portrait_result = {} if portrait_result == {}: return "uid is not in user_portrait" activity_geo_dict = json.loads(portrait_result["fields"]["activity_geo_dict"][0]) now_date_ts = datetime2ts(ts2datetime(int(time.time()))) start_ts = now_date_ts - DAY * len(activity_geo_dict) # step2: iter date to get month track for geo_item in activity_geo_dict: iter_date = ts2datetime(start_ts) sort_day_dict = sorted(geo_item.items(), key=lambda x: x[1], reverse=True) if sort_day_dict: results.append([iter_date, sort_day_dict[0][0]]) else: results.append([iter_date, ""]) start_ts = start_ts + DAY return results
def search_mention(uid, sensitive): date = ts2datetime(time.time()).replace('-','') stat_results = dict() results = dict() test_ts = time.time() test_ts = datetime2ts('2013-09-07') for i in range(0,7): ts = test_ts -i*24*3600 date = ts2datetime(ts).replace('-', '') if not sensitive: at_temp = r_cluster.hget('at_' + str(date), str(uid)) else: at_temp = r_cluster.hget('sensitive_at_' + str(date), str(uid)) if not at_temp: continue else: result_dict = json.loads(at_temp) for at_uid in result_dict: if stat_results.has_key(at_uid): stat_results[uid] += result_dict[at_uid] else: stat_results[uid] = result_dict[at_uid] if not stat_results: return [None, 0] in_status = identify_uid_list_in(result_dict.keys()) for at_uid in result_dict: if at_uid in in_status: results[at_uid] = [result_dict[at_uid], '1'] else: results[at_uid] = [result_dict[at_uid], '0'] sorted_results = sorted(results.items(), key=lambda x:x[1][0], reverse=True) return [sorted_results[0:20], len(results)]
def get_user_sensitive_words(uid): user_sensitive_words_dict = {} if RUN_TYPE: now_ts = time.time() now_date = ts2datetime(now_ts) # 2015-09-22 else: now_date = "2013-09-08" ts = datetime2ts(now_date) #test #ts = datetime2ts('2013-09-08') for i in range(1, 8): ts = ts - 3600 * 24 date = ts2datetime(ts).replace('-', '') results = r_cluster.hget('sensitive_' + str(ts), uid) if results: sensitive_words_dict = json.loads(results) for word in sensitive_words_dict: if user_sensitive_words_dict.has_key(word): user_sensitive_words_dict[word] += sensitive_words_dict[ word] else: user_sensitive_words_dict[word] = sensitive_words_dict[ word] sort_sensitive_words_dict = sorted(user_sensitive_words_dict.items(), key=lambda x: x[1], reverse=True) return sort_sensitive_words_dict
def get_group_user_track(uid): results = [] #step1:get user_portrait activity_geo_dict try: portrait_result = es_user_portrait.get(index=portrait_index_name, doc_type=portrait_index_type,\ id=uid, _source=False, fields=['activity_geo_dict']) except: portrait_result = {} if portrait_result == {}: return 'uid is not in user_portrait' activity_geo_dict = json.loads( portrait_result['fields']['activity_geo_dict'][0]) now_date_ts = datetime2ts(ts2datetime(int(time.time()))) start_ts = now_date_ts - DAY * len(activity_geo_dict) #step2: iter date to get month track for geo_item in activity_geo_dict: iter_date = ts2datetime(start_ts) sort_day_dict = sorted(geo_item.items(), key=lambda x: x[1], reverse=True) if sort_day_dict: results.append([iter_date, sort_day_dict[0][0]]) else: results.append([iter_date, '']) start_ts = start_ts + DAY return results
def count_hot_uid(uid, start_time, stop_time): query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"range":{ "timestamp":{ "gte":start_time, "lt": stop_time } }}, {"term": {"root_uid": uid}} ] } } # "query":{ # "bool":{ # "should":[ # ] # } # } } } } count = 0 datetime = ts2datetime(float(stop_time)) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) if exist_es: count = es_text.count(index=index_name, doc_type=flow_text_index_type, body=query_body)["count"] else: count = 0 datetime_1 = ts2datetime(float(start_time)) if datetime_1 == datetime: pass else: ts = float(stop_time) while 1: ts = ts-day_time datetime = ts2datetime(ts) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) if exist_es: count = es_text.count(index=index_name, doc_type=flow_text_index_type, body=query_body)["count"] else: count += 0 if datetime_1 == datetime: break return count
def get_user_geo(uid): results = [] user_geo_result = {} user_ip_dict = {} user_ip_result = {} # ordinary ip user_sensitive_ip_result = {} # sensitive ip now_ts = time.time() now_date = ts2datetime(now_ts) # 2015-09-22 ts = datetime2ts(now_date) #test ts = datetime2ts('2013-09-08') for i in range(1, 8): ts = ts - 3600 * 24 date = ts2datetime(ts).replace('-', '') results = r_cluster.hget('ip_' + str(date), uid) sensitive_results = r_cluster.hget('sensitive_ip' + str(date), uid) if results: ip_results = json.loads(results) for ip in ip_results: if user_ip_result.has_key(ip): user_ip_result[ip] += ip_results[ip] else: user_ip_result[ip] = ip_results[ip] if sensitive_results: sensitive_ip_results = json.loads(sensitive_results) for ip in sensitive_ip_results: if user_sensitive_ip_result.has_key(ip): user_sensitive_ip_result[ip] += sensitive_ip_results[ip] else: user_sensitive_ip_result[ip] = sensitive_ip_results[ip] ordinary_key_set = set(user_ip_result.keys()) sensitive_key_set = set(user_sensitive_ip_result.keys()) for key in sensitive_key_set: if key in ordinary_key_set: user_ip_result[key] += user_sensitive_ip_result[key] else: user_ip_result[key] = user_sensitive_ip_result[key] user_geo_dict = ip2geo(user_ip_result) sorted_user_geo_dict = sorted(user_geo_dict.items(), key=lambda x: x[1], reverse=True) sensitive_user_geo_dict = ip2geo(user_sensitive_ip_result) sorted_sensitive_user_geo_dict = sorted(sensitive_user_geo_dict.items(), key=lambda x: x[1], reverse=True) return_list = [] return_list = [sorted_user_geo_dict, sorted_sensitive_user_geo_dict] # total and sensitive return return_list
def query_hot_mid(ts, keywords_list, text_type,size=100): query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ {"range":{ "timestamp":{ "gte":ts - time_interval, "lt": ts } }}, {"terms": {"keywords_string": keywords_list}}, {"term": {"message_type": "0"}} ] } } } }, "aggs":{ "all_interests":{ "terms":{"field": "root_mid", "size": size} } } } datetime = ts2datetime(ts) datetime_1 = ts2datetime(ts-time_interval) index_name = flow_text_index_name_pre + datetime exist_es = es_text.indices.exists(index_name) index_name_1 = flow_text_index_name_pre + datetime_1 exist_bool_1 = es_text.indices.exists(index_name_1) print datetime, datetime_1 if datetime == datetime_1 and exist_es: search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["aggregations"]["all_interests"]["buckets"] elif datetime != datetime_1 and exist_bool_1: search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["aggregations"]["all_interests"]["buckets"] else: search_results = [] hot_mid_list = [] if search_results: for item in search_results: print item temp = [] temp.append(item['key']) temp.append(item['doc_count']) hot_mid_list.append(temp) #print hot_mid_list return hot_mid_list
def get_user_geo(uid): results = [] user_geo_result = {} user_ip_dict = {} user_ip_result = {} # ordinary ip user_sensitive_ip_result = {} # sensitive ip now_ts = time.time() now_date = ts2datetime(now_ts) # 2015-09-22 ts = datetime2ts(now_date) #test ts = datetime2ts('2013-09-08') for i in range(1,8): ts = ts - 3600*24 date = ts2datetime(ts).replace('-','') results = r_cluster.hget('ip_'+str(date), uid) sensitive_results = r_cluster.hget('sensitive_ip'+str(date), uid) if results: ip_results = json.loads(results) for ip in ip_results: if user_ip_result.has_key(ip): user_ip_result[ip] += ip_results[ip] else: user_ip_result[ip] = ip_results[ip] if sensitive_results: sensitive_ip_results = json.loads(sensitive_results) for ip in sensitive_ip_results: if user_sensitive_ip_result.has_key(ip): user_sensitive_ip_result[ip] += sensitive_ip_results[ip] else: user_sensitive_ip_result[ip] = sensitive_ip_results[ip] ordinary_key_set = set(user_ip_result.keys()) sensitive_key_set = set(user_sensitive_ip_result.keys()) for key in sensitive_key_set: if key in ordinary_key_set: user_ip_result[key] += user_sensitive_ip_result[key] else: user_ip_result[key] = user_sensitive_ip_result[key] user_geo_dict = ip2geo(user_ip_result) sorted_user_geo_dict = sorted(user_geo_dict.items(), key=lambda x:x[1], reverse=True) sensitive_user_geo_dict = ip2geo(user_sensitive_ip_result) sorted_sensitive_user_geo_dict = sorted(sensitive_user_geo_dict.items(), key=lambda x:x[1], reverse=True) return_list = [] return_list = [sorted_user_geo_dict, sorted_sensitive_user_geo_dict] # total and sensitive return return_list
def get_influence_content(uid, timestamp_from, timestamp_to): weibo_list = [] # split timestamp range to new_range_dict_list from_date_ts = datetime2ts(ts2datetime(timestamp_from)) to_date_ts = datetime2ts(ts2datetime(timestamp_to)) new_range_dict_list = [] if from_date_ts != to_date_ts: iter_date_ts = from_date_ts while iter_date_ts < to_date_ts: iter_next_date_ts = iter_date_ts + DAY new_range_dict_list.append({"range": {"timestamp": {"gte": iter_date_ts, "lt": iter_next_date_ts}}}) iter_date_ts = iter_next_date_ts if new_range_dict_list[0]["range"]["timestamp"]["gte"] < timestamp_from: new_range_dict_list[0]["range"]["timestamp"]["gte"] = timestamp_from if new_range_dict_list[-1]["range"]["timestamp"]["lt"] > timestamp_to: new_range_dict_list[-1]["range"]["timestamp"]["lt"] = timestamp_to else: new_range_dict_list = [{"range": {"timestamp": {"gte": timestamp_from, "lt": timestamp_to}}}] # iter date to search flow_text iter_result = [] for range_item in new_range_dict_list: range_from_ts = range_item["range"]["timestamp"]["gte"] range_from_date = ts2datetime(range_from_ts) flow_text_index_name = flow_text_index_name_pre + range_from_date query = [] query.append({"term": {"uid": uid}}) query.append(range_item) try: flow_text_exist = es_flow_text.search( index=flow_text_index_name, doc_type=flow_text_index_type, body={"query": {"bool": {"must": query}}, "sort": [{"timestamp": "asc"}]}, )["hits"]["hits"] except: flow_text_exist = [] iter_result.extend(flow_text_exist) # get weibo list for item in flow_text_exist: source = item["_source"] weibo = {} weibo["timestamp"] = ts2date(source["timestamp"]) weibo["ip"] = source["ip"] weibo["text"] = source["text"] if source["geo"]: weibo["geo"] = "\t".join(source["geo"].split("&")) else: weibo["geo"] = "" weibo_list.append(weibo) return weibo_list
def get_network(task_exist): task_name = task_exist['task_name'] submit_date = task_exist['submit_date'] submit_ts = date2ts(submit_date) time_segment = 24*3600 now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) #test now_date_ts = datetime2ts('2013-09-07') iter_date_ts = now_date_ts iter_count = 1 date_list = [] top_list_dict = {} while True: if iter_count >= 8 or iter_date_ts < submit_ts: break iter_date = ts2datetime(iter_date_ts) date_list.append(iter_date) key = 'inner_' + str(iter_date) try: task_date_result = es.get(index=monitor_index_name, doc_type=task_name, id=key)['_source'] except: task_date_result = {} #print 'task_name, key, task_date_result:', task_name, key, task_date_result iter_field = ['top1', 'top2', 'top3', 'top4', 'top5'] for field in iter_field: user_count_item = json.loads(task_date_result[field]) uid = user_count_item[0] uname = uid2uname(uid) count = user_count_item[1] try: top_list_dict[field].append([uid, uname, count]) except: top_list_dict[field] = [[uid, uname, count]] iter_date_ts -= time_segment # get inner-retweet group from es---field: inner_graph ''' try: inner_graph = json.loads(task_date_result['inner_graph']) except: inner_graph = {} ''' abnormal_index = compute_inner_polarization(top_list_dict) return [date_list, top_list_dict, abnormal_index]
def ajax_upload_track_file(): results = {} upload_data = request.form['upload_data'] task_name = request.form['task_name'] state = request.args.form['state'] now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) time_segment = int((now_ts - now_Date_ts) / 900) + 1 trans_ts = now_date_ts + time_segment * 900 line_list = upload_data.split('\n') input_data = {} #submit task and start time is 15min multiple input_data['submit_date'] = trans_ts input_data['task_name'] = task_name uid_list = [] for line in line_list: uid = line[:10] if len(uid) == 10: uid_list.append(uid) input_data['uid_list'] = uid_list input_data[ 'status'] = 1 # status show the track task is doing or end; doing 1, end 0 input_data['count'] = len(uid_list) status = submit_track_task(input_data) return json.dumps(status)
def ajax_upload_track_file(): results = {} upload_data = request.form['upload_data'] task_name = request.form['task_name'] state = request.args.form['state'] now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) time_segment = int((now_ts - now_Date_ts) / 900) + 1 trans_ts = now_date_ts + time_segment * 900 line_list = upload_data.split('\n') input_data = {} #submit task and start time is 15min multiple input_data['submit_date'] = trans_ts input_data['task_name'] = task_name uid_list = [] for line in line_list: uid = line[:10] if len(uid)==10: uid_list.append(uid) input_data['uid_list'] = uid_list input_data['status'] = 1 # status show the track task is doing or end; doing 1, end 0 input_data['count'] = len(uid_list) status = submit_track_task(input_data) return json.dumps(status)
def influence_distribute(): row = [0, 200, 500, 700, 900, 1100, 10000] result = [] ts = time.time() ts = datetime2ts('2013-09-08') # test ts = ts - 8*3600*24 for j in range(7): detail = [] ts += 3600*24 date = ts2datetime(ts).replace('-', '') for i in range(6): low_limit = row[i] upper_limit = row[i+1] query_body = { "query": { "filtered": { "filter": { "range": { date: { "gte": low_limit, "lt": upper_limit } } } } } } number = es.count(index='copy_sensitive_user_portrait', doc_type="user", body=query_body)['count'] detail.append(number) result.append(detail) return [row, result]
def show_detect_task(submit_user): results = [] query = [{ 'match': { 'task_type': 'detect' } }, { 'term': { 'submit_user': submit_user } }] try: search_results = es_group_result.search(index=group_index_name, doc_type=group_index_type, \ body={'query':{'bool':{'must':query}}, 'sort':[{'submit_date': 'desc'}], 'size':MAX_VALUE})['hits']['hits'] except: search_results = [] for group_item in search_results: source = group_item['_source'] task_name = source['task_name'] submit_date = ts2datetime(int(source['submit_date'])) submit_user = source['submit_user'] detect_type = source['detect_type'] state = source['state'] process = source['detect_process'] results.append( [task_name, submit_user, submit_date, detect_type, state, process]) return results
def end_track_task(task_name): status = 0 try: task_exist = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] except: return 'task name not exist' task_status = task_exist['status'] if status == '0': return 'task have end' else: task_exist['status'] = 0 # made end time now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) time_segment = int((now_ts - now_date_ts) / 900) + 1 end_ts = now_date_ts + time_segment * 900 end_date = ts2date(end_ts) task_exist['end_date'] = end_date task_user = task_exist['uid_list'] status = change_user_count(task_user) if status == 0: return 'change user task count fail' else: es.index(index=index_name, doc_type=index_type, id=task_name, body=task_exist) status = delete_task_redis(task_name) if status == 0: return 'delete task from redis fail' else: return 'success change status to end'
def submit_attribute(attribute_name, attribute_value, submit_user, submit_date): status = False #maybe there have to identify the user admitted to submit attribute try: attribute_exist = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['docs'] except: attribute_exist = {} try: source = attribute_exist['_source'] except: input_data = dict() now_ts = time.time() date = ts2datetime(now_ts) input_data['attribute_name'] = attribute_name input_data['attribute_value'] = '&'.join(attribute_value.split(',')) input_data['user'] = submit_user input_data['date'] = submit_date es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name, body=input_data) status = True return status
def get_user_hashtag(uid): user_hashtag_dict = {} sensitive_user_hashtag_dict = {} now_ts = time.time() now_date = ts2datetime(now_ts) # 2015-09-22 ts = datetime2ts(now_date) #test ts = datetime2ts('2013-09-08') for i in range(1, 8): ts = ts - 3600 * 24 date = ts2datetime(ts).replace('-', '') results = r_cluster.hget('hashtag_' + str(date), uid) sensitive_results = r_cluster.hget('sensitive_hashtag_' + str(date), uid) if results: hashtag_dict = json.loads(results) for hashtag in hashtag_dict: if user_hashtag_dict.has_key(hashtag): user_hashtag_dict[hashtag] += hashtag_dict[hashtag] else: user_hashtag_dict[hashtag] = hashtag_dict[hashtag] if sensitive_results: sensitive_hashtag_dict = json.loads(sensitive_results) for hashtag in sensitive_hashtag_dict: if sensitive_user_hashtag_dict.has_key(hashtag): sensitive_user_hashtag_dict[ hashtag] += sensitive_hashtag_dict[hashtag] else: sensitive_user_hashtag_dict[ hashtag] = sensitive_hashtag_dict[hashtag] ordinary_key_set = set(user_hashtag_dict.keys()) sensitive_key_set = set(sensitive_user_hashtag_dict.keys()) for key in sensitive_key_set: if key in ordinary_key_set: user_hashtag_dict[key] += sensitive_user_hashtag_dict[key] else: user_hashtag_dict[key] = sensitive_user_hashtag_dict[key] sort_hashtag_dict = sorted(user_hashtag_dict.items(), key=lambda x: x[1], reverse=True) sort_sensitive_dict = sorted(sensitive_user_hashtag_dict.items(), key=lambda x: x[1], reverse=True) return [sort_hashtag_dict, sort_sensitive_dict]
def sort_sensitive_text(uid): sensitive_text = search_sensitive_text(uid) text_all = [] if sensitive_text: for item in sensitive_text: text_detail = [] item = item["_source"] if not item["sensitive"]: continue text = item["text"].encode("utf-8", "ignore") sentiment_dict = json.loads(item["sentiment"]) if not sentiment_dict: sentiment = 0 else: positive = len(sentiment_dict.get("126", {})) negetive = ( len(sentiment_dict.get("127", {})) + len(sentiment_dict.get("128", {})) + len(sentiment_dict.get("129", {})) ) if positive > negetive: sentiment = 1 elif positive < negetive: sentiment = -1 else: sentiment = 0 ts = item["timestamp"] uid = item["uid"] mid = item["mid"] message_type = item.get("message_type", 0) date = ts2datetime(float(ts)).replace("-", "") try: bci_result = es.get(index=date, doc_type="bci", id=uid)["_source"] if int(message_type) == 1: retweeted_number = bci_result["s_origin_weibo_retweeted_detail"].get(mid) comment_number = bci_result["s_origin_weibo_comment_detail"].get(mid) elif int(message_type) == 2: retweeted_number = bci_result["s_retweeted_weibo_retweeted_detail"].get(mid) comment_number = bci_result["s_retweeted_weibo_comment_detail"].get(mid) else: retweeted_number = 0 comment_number = 0 except: retweeted_number = 0 comment_number = 0 single_sw = item.get("sensitive_words", {}) if single_sw: sw = json.loads(single_sw).keys() else: # print item sw = [] geo = item["geo"] retweeted_link = extract_uname(text) text_detail.extend( [ts, geo, text, sw, retweeted_link, sentiment, message_type, retweeted_number, comment_number] ) text_all.append(text_detail) return text_all
def ajax_show_sensitive_history_in(): results = [] now_date = ts2datetime(time.time()) date = request.args.get('date', now_date) # in date:2013-09-01 if str(date) == "all": ts = time.time() now_ts = datetime2ts(now_date) for i in range(7): ts = now_ts - i*24*3600 date = ts2datetime(ts) temp = show_in_history(date, 1) results.extend(temp) else: results = show_in_history(date, 1) # history in, include status if results: return json.dumps(results) else: return json.dumps([])
def get_text_index(date): now_ts = datetime2ts(date) index_list = [] for i in range(7): ts = now_ts - i*DAY tmp_index = pre_text_index + ts2datetime(ts) index_list.append(tmp_index) return index_list
def get_activity_weibo(task_name, submit_user, start_ts, time_segment=FOUR_HOUR): results = [] #step1: get task_name uid task_id = submit_user + task_name try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\ id=task_id, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step2: get uid2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body = {'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step3: search time_segment weibo end_ts = start_ts + time_segment time_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + time_date query = [] query.append({'terms': {'uid': uid_list}}) query.append({'range': {'timestamp': {'gte': start_ts, 'lt': end_ts}}}) try: flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \ body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits'] except: flow_text_es_result = [] for item in flow_text_es_result: weibo = {} source = item['_source'] weibo['timestamp'] = ts2date(source['timestamp']) weibo['ip'] = source['ip'] weibo['text'] = source['text'] if source['geo']: weibo['geo'] = '\t'.join(source['geo']) else: weibo['geo'] = '' results.append(weibo) return results
def ajax_show_influence_history_in(): results = [] now_date = ts2datetime(time.time()) date = request.args.get('date', now_date) if str(date) == "all": ts = time.time() now_ts = datetime2ts('2013-09-07') for i in range(7): ts = now_ts - i*24*3600 date = ts2datetime(ts) date = str(date).replace('-', '') temp = show_in_history(date, 1) results.extend(temp) else: date = str(date).replace('-','') results = show_in_history(date, 0) # history in, include status if results: return json.dumps(results) else: return json.dumps([])
def ajax_show_influence_history_in(): results = [] now_date = ts2datetime(time.time()) date = request.args.get('date', now_date) if str(date) == "all": ts = time.time() now_ts = datetime2ts('2013-09-07') for i in range(7): ts = now_ts - i * 24 * 3600 date = ts2datetime(ts) date = str(date).replace('-', '') temp = show_in_history(date, 1) results.extend(temp) else: date = str(date).replace('-', '') results = show_in_history(date, 0) # history in, include status if results: return json.dumps(results) else: return json.dumps([])
def show_in_history(date): print date results = [] sensitive_uid_list = [] influence_uid_list = [] sen_iden_in_name = "identify_in_sensitive_" + str(date) inf_iden_in_name = "identify_in_influence_" + str(date) man_iden_in_name = "identify_in_manual_" + str(date) sen_iden_in_results = r.hgetall(sen_iden_in_name) inf_iden_in_results = r.hgetall(inf_iden_in_name) man_iden_in_results = r.hgetall(man_iden_in_name) sensitive_uid_list = sen_iden_in_results.keys() influence_uid_list = inf_iden_in_results.keys() manual_uid_list = man_iden_in_results.keys() #compute_results = r.hgetall('compute') results = [] work_date = ts2datetime(datetime2ts(date) - DAY) if sensitive_uid_list: sensitive_results = get_sensitive_user_detail(sensitive_uid_list, work_date, 1) else: sensitive_results = [] for item in sensitive_results: uid = item[0] status = sen_iden_in_results[uid] item.append(status) results.append(item) if influence_uid_list: influence_results = get_sensitive_user_detail(influence_uid_list, work_date, 0) else: influence_results = [] for item in influence_results: uid = item[0] status = inf_iden_in_results[uid] item.append(status) results.append(item) if manual_uid_list: manual_results = get_sensitive_user_detail(manual_uid_list, work_date, 0) else: manual_results = [] for item in manual_results: uid = item[0] status = man_iden_in_results[uid] item.append(status) results.append(item) sorted_results = sorted(results, key=lambda x: x[5], reverse=True) return sorted_results
def search_detect_task(task_name, submit_date, state, process, detect_type, submit_user): results = [] query = [{'match':{'task_type': 'detect'}}] condition_num = 0 if task_name: task_name_list = task_name.split(' ') for item in task_name_list: query.append({'wildcard':{'task_name': '*'+item+'*'}}) condition_num += 1 if submit_date: submit_date_ts = datetime2ts(submit_date) submit_date_from = submit_date_ts submit_date_to = submit_date_ts + DAY query.append({'range':{'submit_date':{'gte':submit_date_from, 'lt':submit_date_to}}}) condition_num += 1 if state: state_list = state.split(' ') for item in state_list: query.append({'wildcard':{'state': '*'+item+'*'}}) condition_num += 1 if process: query.append({'range':{'detect_process':{'from': int(process), 'to': MAX_PROCESS}}}) condition_num += 1 if detect_type: detect_type_list = detect_type.split(',') nest_body_list = [] for type_item in detect_type_list: nest_body_list.append({'wildcard':{'detect_type': '*'+type_item+'*'}}) query.append({'bool':{'should': nest_body_list}}) condition_num += 1 if submit_user: query.append({'wildcard':{'submit_user': '******'+submit_user+'*'}}) condition_num += 1 try: search_result = es_group_result.search(index=group_index_name, doc_type=group_index_type, \ body={'query':{'bool': {'must': query}}, 'sort':[{'submit_date': {'order': 'desc'}}], 'size':MAX_VALUE})['hits']['hits'] except: search_result = [] #get group information table for group_item in search_result: source = group_item['_source'] task_name = source['task_name'] submit_date = ts2datetime(int(source['submit_date'])) submit_user = source['submit_user'] detect_type = source['detect_type'] state = source['state'] process = source['detect_process'] results.append([task_name, submit_user, submit_date, detect_type, state, process]) return results
def lastest_identify_in(): results = dict() now_ts = time.time() now_ts = datetime2ts('2013-09-08') for i in range(1,8): ts = now_ts - i * 3600 *24 date = ts2datetime(ts).replace('-','') words_dict = r.hgetall('history_in_'+date) for item in words_dict: results[item] = json.loads(words_dict[item]) return results
def sort_sensitive_text(uid): sensitive_text = search_sensitive_text(uid) text_all = [] if sensitive_text: for item in sensitive_text: text_detail = [] item = item['_source'] if not item['sensitive']: continue text = item['text'].encode('utf-8', 'ignore') sentiment_dict = json.loads(item['sentiment']) if not sentiment_dict: sentiment = 0 else: positive = len(sentiment_dict.get('126', {})) negetive = len(sentiment_dict.get('127', {})) + len(sentiment_dict.get('128', {})) + len(sentiment_dict.get('129', {})) if positive > negetive: sentiment = 1 elif positive < negetive: sentiment = -1 else: sentiment = 0 ts =item['timestamp'] uid = item['uid'] mid = item['mid'] message_type = item.get('message_type', 0) date = ts2datetime(float(ts)).replace('-', '') try: bci_result = es.get(index=date, doc_type='bci', id=uid)['_source'] if int(message_type) == 1: retweeted_number = bci_result['s_origin_weibo_retweeted_detail'].get(mid) comment_number = bci_result['s_origin_weibo_comment_detail'].get(mid) elif int(message_type) == 2: retweeted_number = bci_result['s_retweeted_weibo_retweeted_detail'].get(mid) comment_number = bci_result['s_retweeted_weibo_comment_detail'].get(mid) else: retweeted_number = 0 comment_number = 0 except: retweeted_number = 0 comment_number = 0 single_sw = item.get('sensitive_words', {}) if single_sw: sw = json.loads(single_sw).keys() else: # print item sw = [] geo = item['geo'] retweeted_link = extract_uname(text) text_detail.extend([ts, geo, text, sw, retweeted_link, sentiment, message_type, retweeted_number, comment_number]) text_all.append(text_detail) return text_all
def get_top_all_influence(key, ts): query_body = { "query":{ "match_all": {} }, "sort":{key:{"order":"desc"}}, "size": 1 } index_name = "bci_" + ts2datetime(ts).replace('-','') if not es.indices.exists(index=index_name): index_name = "bci_" + ts2datetime(ts-DAY).replace('-','') exist_es = es.indices.exists(index=index_name) if exist_es: search_result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] else: search_result = {} if search_result: result = search_result[0]['_source'][key] else: result = 2000 return result
def get_user_hashtag(uid): user_hashtag_dict = {} sensitive_user_hashtag_dict = {} now_ts = time.time() now_date = ts2datetime(now_ts) # 2015-09-22 ts = datetime2ts(now_date) #test ts = datetime2ts('2013-09-08') for i in range(1,8): ts = ts - 3600*24 date = ts2datetime(ts).replace('-','') results = r_cluster.hget('hashtag_'+str(date), uid) sensitive_results = r_cluster.hget('sensitive_hashtag_'+str(date), uid) if results: hashtag_dict = json.loads(results) for hashtag in hashtag_dict: if user_hashtag_dict.has_key(hashtag): user_hashtag_dict[hashtag] += hashtag_dict[hashtag] else: user_hashtag_dict[hashtag] = hashtag_dict[hashtag] if sensitive_results: sensitive_hashtag_dict = json.loads(sensitive_results) for hashtag in sensitive_hashtag_dict: if sensitive_user_hashtag_dict.has_key(hashtag): sensitive_user_hashtag_dict[hashtag] += sensitive_hashtag_dict[hashtag] else: sensitive_user_hashtag_dict[hashtag] = sensitive_hashtag_dict[hashtag] ordinary_key_set = set(user_hashtag_dict.keys()) sensitive_key_set = set(sensitive_user_hashtag_dict.keys()) for key in sensitive_key_set: if key in ordinary_key_set: user_hashtag_dict[key] += sensitive_user_hashtag_dict[key] else: user_hashtag_dict[key] = sensitive_user_hashtag_dict[key] sort_hashtag_dict = sorted(user_hashtag_dict.items(), key=lambda x:x[1], reverse=True) sort_sensitive_dict = sorted(sensitive_user_hashtag_dict.items(), key=lambda x:x[1], reverse=True) return [sort_hashtag_dict, sort_sensitive_dict]
def show_in_history(date): print date results = [] sensitive_uid_list = [] influence_uid_list = [] sen_iden_in_name = "identify_in_sensitive_" + str(date) inf_iden_in_name = "identify_in_influence_" + str(date) man_iden_in_name = "identify_in_manual_" + str(date) sen_iden_in_results = r.hgetall(sen_iden_in_name) inf_iden_in_results = r.hgetall(inf_iden_in_name) man_iden_in_results = r.hgetall(man_iden_in_name) sensitive_uid_list = sen_iden_in_results.keys() influence_uid_list = inf_iden_in_results.keys() manual_uid_list = man_iden_in_results.keys() #compute_results = r.hgetall('compute') results = [] work_date = ts2datetime(datetime2ts(date)-DAY) if sensitive_uid_list: sensitive_results = get_sensitive_user_detail(sensitive_uid_list, work_date, 1) else: sensitive_results = [] for item in sensitive_results: uid = item[0] status = sen_iden_in_results[uid] item.append(status) results.append(item) if influence_uid_list: influence_results = get_sensitive_user_detail(influence_uid_list, work_date, 0) else: influence_results = [] for item in influence_results: uid = item[0] status = inf_iden_in_results[uid] item.append(status) results.append(item) if manual_uid_list: manual_results = get_sensitive_user_detail(manual_uid_list, work_date, 0) else: manual_results = [] for item in manual_results: uid = item[0] status = man_iden_in_results[uid] item.append(status) results.append(item) sorted_results = sorted(results, key=lambda x:x[5], reverse=True) return sorted_results
def user_sentiment_trend(uid): query_body = {"query": {"filtered": {"filter": {"term": {"uid": uid}}}}} search_results = es.search(index='sensitive_user_text', doc_type='user', body=query_body)['hits']['hits'] sentiment_dict = dict() sentiment_results = dict() for item in search_results: datetime = ts2datetime(float(item['_source']['timestamp'])).replace( '-', '') try: sentiment_dict[datetime].append( json.loads(item['_source']['sentiment'])) except: sentiment_dict[datetime] = [ json.loads(item['_source']['sentiment']) ] total_positive = 0 total_negetive = 0 total_neutral = 0 for datetime, sentiment_detail in sentiment_dict.items(): positive_count = 0 negetive_count = 0 neutral_count = 0 sentiment_results[datetime] = {} for item in sentiment_detail: if not item: try: neutral_count += 1 except: neutral_count = 1 total_neutral += 1 continue positive_dict = item.get('126', {}) positive = sum(positive_dict.values()) positive_count += positive negetive = sum(item.get('127', {}).values()) + sum( item.get('128', {}).values()) + sum( item.get('129', {}).values()) negetive_count += negetive if positive > negetive: total_positive += 1 elif positive < negetive: total_negetive += 1 else: total_neutral += 1 sentiment_results[datetime]['neutral'] = neutral_count sentiment_results[datetime]['positive'] = positive_count sentiment_results[datetime]['negetive'] = negetive_count return [[total_positive, total_neutral, total_negetive], sentiment_results]
def user_sentiment_trend(uid): query_body = { "query":{ "filtered":{ "filter":{ "term": {"uid": uid} } } } } search_results = es.search(index='sensitive_user_text', doc_type='user', body=query_body)['hits']['hits'] sentiment_dict = dict() sentiment_results = dict() for item in search_results: datetime = ts2datetime(float(item['_source']['timestamp'])).replace('-', '') try: sentiment_dict[datetime].append(json.loads(item['_source']['sentiment'])) except: sentiment_dict[datetime] = [json.loads(item['_source']['sentiment'])] total_positive = 0 total_negetive = 0 total_neutral = 0 for datetime, sentiment_detail in sentiment_dict.items(): positive_count = 0 negetive_count = 0 neutral_count = 0 sentiment_results[datetime] = {} for item in sentiment_detail: if not item: try: neutral_count += 1 except: neutral_count = 1 total_neutral += 1 continue positive_dict = item.get('126', {}) positive = sum(positive_dict.values()) positive_count += positive negetive = sum(item.get('127', {}).values()) + sum(item.get('128', {}).values()) + sum(item.get('129', {}).values()) negetive_count += negetive if positive > negetive: total_positive += 1 elif positive < negetive: total_negetive += 1 else: total_neutral += 1 sentiment_results[datetime]['neutral'] = neutral_count sentiment_results[datetime]['positive'] = positive_count sentiment_results[datetime]['negetive'] = negetive_count return [[total_positive, total_neutral, total_negetive], sentiment_results]
def ajax_full_text_search(): if RUN_TYPE: ts = time.time() else: ts = datetime2ts("2013-09-02") now_date = ts2datetime(ts) start_time = request.args.get("start_time", now_date) # 2013-09-01 end_time = request.args.get("end_time", now_date) uid = request.args.get("uid", "") size = request.args.get("number", 100) keywords = request.args.get("keywords", "") # 逗号分隔 results = full_text_search(keywords, uid, start_time, end_time, size) return json.dumps(results)
def recommend_in_sensitive(date): sensitive_name = "recomment_" + str(date) + "_sensitive" compute_name = "compute" re_sen_set = r.hkeys(sensitive_name) # 敏感人物推荐 iden_in_set = r.hkeys(compute_name) # 已经入库用户 if not re_sen_set: return [] # 那一天不存在数据 uid_list = list(set(re_sen_set) - set(iden_in_set)) sensitive = 1 work_date = ts2datetime(datetime2ts(date) - DAY) if uid_list: results = get_sensitive_user_detail(uid_list, work_date, sensitive) else: results = [] return results
def recommend_in_sensitive(date): sensitive_name = "recomment_" + str(date) + "_sensitive" compute_name = "compute" re_sen_set = r.hkeys(sensitive_name) # 敏感人物推荐 iden_in_set = r.hkeys(compute_name) # 已经入库用户 if not re_sen_set: return [] # 那一天不存在数据 uid_list = list(set(re_sen_set) - set(iden_in_set)) sensitive = 1 work_date = ts2datetime(datetime2ts(date)-DAY) if uid_list: results = get_sensitive_user_detail(uid_list, work_date, sensitive) else: results = [] return results
def ajax_submit_task(): input_data = dict() """ input_data['task_name'] = request.args.get('task_name', '') input_data['uid_list'] = request.args.get('uid_list', '') # uid_list=[uid1, uid2] input_data['submit_date'] = request.args.get('submit_date', '') input_data['state'] = request.args.get('state', '') """ input_data = request.get_json() #print input_data, type(input_data) now_ts = time.time() now_date = ts2datetime(now_ts) input_data['submit_date'] = now_date status = submit_task(input_data) return json.dumps(status)
def search_mention(uid, sensitive): date = ts2datetime(time.time()).replace('-', '') stat_results = dict() results = dict() test_ts = time.time() test_ts = datetime2ts('2013-09-07') for i in range(0, 7): ts = test_ts - i * 24 * 3600 date = ts2datetime(ts).replace('-', '') if not sensitive: at_temp = r_cluster.hget('at_' + str(date), str(uid)) else: at_temp = r_cluster.hget('sensitive_at_' + str(date), str(uid)) if not at_temp: continue else: result_dict = json.loads(at_temp) for at_uid in result_dict: if stat_results.has_key(at_uid): stat_results[uid] += result_dict[at_uid] else: stat_results[uid] = result_dict[at_uid] if not stat_results: return [None, 0] in_status = identify_uid_list_in(result_dict.keys()) for at_uid in result_dict: if at_uid in in_status: results[at_uid] = [result_dict[at_uid], '1'] else: results[at_uid] = [result_dict[at_uid], '0'] sorted_results = sorted(results.items(), key=lambda x: x[1][0], reverse=True) return [sorted_results[0:20], len(results)]
def ajax_task_sort(): results = [] now_ts = time.time() now_date = ts2datetime(now_ts) user = request.args.get('user', '') keyword = request.args.get("keyword", "") # 逗号分隔 status = request.args.get("status", 2) # 2 for all, no limit start_time = request.args.get("start_time", "") end_time = request.args.get("end_time", now_date) submit_time = request.args.get('submit_time', "") status = int(status) #if user: results = sort_task(user, keyword, status, start_time, end_time, submit_time) return json.dumps(results)
def identify_in(date, words_list): # identify_in date and words_list(include level and category, [word, level, category]) # date is date when new words were recommended ts = time.time() ts = datetime2ts('2013-09-07') time_list = [] for i in range(7): now_ts = int(ts) - i*24*3600 now_date = ts2datetime(now_ts).replace('-', '') time_list.append(now_date) for item in words_list: r.hset('sensitive_words', item[0], json.dumps([item[1], item[2]])) r.hset('history_in_'+date, item[0], json.dumps([item[1], item[2]])) for date in time_list: r.hdel('recommend_sensitive_words_'+date, item[0]) return '1'
def ajax_task_sort(): results = [] now_ts = time.time() now_date = ts2datetime(now_ts) user = request.args.get('user', '') keyword = request.args.get("keyword", "") # 逗号分隔 status = request.args.get("status", 2) # 2 for all, no limit start_time = request.args.get("start_time", "") end_time = request.args.get("end_time", now_date) submit_time = request.args.get('submit_time', "") status = int(status) if user: results = sort_task(user, keyword, status, start_time, end_time, submit_time) return json.dumps(results)
def user_sentiment_trend(uid): query_body = {"query": {"filtered": {"filter": {"term": {"uid": uid}}}}} search_results = es.search(index="sensitive_user_text", doc_type="user", body=query_body)["hits"]["hits"] sentiment_dict = dict() sentiment_results = dict() for item in search_results: datetime = ts2datetime(float(item["_source"]["timestamp"])).replace("-", "") try: sentiment_dict[datetime].append(json.loads(item["_source"]["sentiment"])) except: sentiment_dict[datetime] = [json.loads(item["_source"]["sentiment"])] total_positive = 0 total_negetive = 0 total_neutral = 0 for datetime, sentiment_detail in sentiment_dict.items(): positive_count = 0 negetive_count = 0 neutral_count = 0 sentiment_results[datetime] = {} for item in sentiment_detail: if not item: try: neutral_count += 1 except: neutral_count = 1 total_neutral += 1 continue positive_dict = item.get("126", {}) positive = sum(positive_dict.values()) positive_count += positive negetive = ( sum(item.get("127", {}).values()) + sum(item.get("128", {}).values()) + sum(item.get("129", {}).values()) ) negetive_count += negetive if positive > negetive: total_positive += 1 elif positive < negetive: total_negetive += 1 else: total_neutral += 1 sentiment_results[datetime]["neutral"] = neutral_count sentiment_results[datetime]["positive"] = positive_count sentiment_results[datetime]["negetive"] = negetive_count return [[total_positive, total_neutral, total_negetive], sentiment_results]
def recommend_in_top_influence(date): influence_name = "recomment_" + date + "_influence" identify_in_name = "compute" re_inf_set = r.hkeys(influence_name) iden_in_set = r.hkeys(identify_in_name) # 已经入库用户 if not re_inf_set: return [] else: uid_list = list(set(re_inf_set) - set(iden_in_set)) sensitive = 0 work_date = ts2datetime(datetime2ts(date)-DAY) if uid_list: results = get_sensitive_user_detail(uid_list, work_date, sensitive) else: results = [] return results
def recommend_in_top_influence(date): influence_name = "recomment_" + date + "_influence" identify_in_name = "compute" re_inf_set = r.hkeys(influence_name) iden_in_set = r.hkeys(identify_in_name) # 已经入库用户 if not re_inf_set: return [] else: uid_list = list(set(re_inf_set) - set(iden_in_set)) sensitive = 0 work_date = ts2datetime(datetime2ts(date) - DAY) if uid_list: results = get_sensitive_user_detail(uid_list, work_date, sensitive) else: results = [] return results
def change_attribute(attribute_name, value, user, state): status = False # identify the attribute_name is in ES - custom attribute try: result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: result = None return status value_list = '&'.join(value.split(',')) result['attribute_name'] = attribute_name result['attribute_value'] = value_list result['user'] = user now_ts = time.time() now_date = ts2datetime(now_ts) result['date'] = now_date es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name ,body=result) status = True return status
def upload_file(): upload_data = request.form['upload_data'] task_name = request.form['task_name'] state = request.form['state'] now_ts = time.time() now_date = ts2datetime(now_ts) line_list = upload_data.split('\n') input_data = {} input_data['submit_date'] = now_date input_data['task_name'] = task_name input_data['state'] = state uid_list = [] for line in line_list: uid = line[:10] if len(uid) == 10: uid_list.append(uid) input_data['uid_list'] = uid_list status = submit_task(input_data) return json.dumps(status)
def submit_attribute(attribute_name, attribute_value, submit_user, submit_date): status = False #maybe there have to identify the user admitted to submit attribute exist_bool = es.exists(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name) if exist_bool: return "tag exists" else: input_data = dict() now_ts = time.time() date = ts2datetime(now_ts) input_data['attribute_name'] = attribute_name input_data['attribute_value'] = '&'.join(attribute_value.split(',')) input_data['user'] = submit_user input_data['date'] = submit_date es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name, body=input_data) submit_tag = "tag-" + attribute_name exist_field = es_user_portrait.indices.get_field_mapping(index=user_index_name, doc_type=user_index_type, field=submit_tag) if not exist_field: print es_user_portrait.indices.put_mapping(index=user_index_name, doc_type=user_index_type,body={'properties':{submit_tag:{'type':'string', 'analyzer':'my_analyzer'}}}, ignore=400) status = True print status return status
def influenced_people(uid, mid, influence_style, date, default_number=20): # uid # which weibo----mid, retweeted weibo ---seek for root_mid # influence_style: retweeted(0) or comment(1) date1 = ts2datetime(datetime2ts(date)).replace('-', '') index_name = pre_index + date1 index_flow_text = pre_text_index + date text_result = es.get(index=index_flow_text, doc_type=flow_text_index_type, id=mid)["_source"] temp_mid = text_result.get("root_mid",'') #判断微博是否是原创微博 if temp_mid: mid_type = 1 # 非原创微博 else: mid_type = 0 # 原创微博 query_body = { "query":{ "filtered":{ "filter":{ "bool":{ "must":[ ] } } } }, "size": 30000 } if RUN_TYPE: query_body["sort"] = {"user_fansnum":{"order":"desc"}} if int(mid_type) == 0: if int(influence_style) == 0: # origin weibo, all retweeted people query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"root_uid": uid}}, {"term": {"message_type": 3}}, {"term": {"root_mid": mid}}]) else: # commented people query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 2}}, {"term": {"root_mid": mid}}]) else: if int(influence_style) == 0: # origin weibo, all retweeted people query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 3}}, {"term": {"root_mid": temp_mid}}]) else: # commented people query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 2}}, {"term": {"root_mid": temp_mid}}]) search_results = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_body, _source=False, fields=["uid"], timeout=30)["hits"]["hits"] results = [] # uid_list if search_results: for item in search_results: if int(item["fields"]["uid"][0]) == int(uid): pass else: results.append(item["fields"]["uid"][0]) results = list(set(results)) else: results = [] bci_index = "bci_" + date.replace('-','') if results: portrait_results = es_user_portrait.mget(index=user_portrait, doc_type=portrait_index_type, body={"ids": results}, fields=["domain", "topic_string", "activity_geo_dict","importance", "influence"])["docs"] bci_results = es_cluster.mget(index=bci_index, doc_type='bci', body={"ids":results}, fields=['user_index'])['docs'] else: portrait_results = {} bci_results = {} in_portrait = [] out_portrait = [] in_portrait_info = [] retweeted_domain = {} retweeted_topic = {} retweeted_geo = {} average_influence = 0 total_influence = 0 count = 0 if bci_results: total_influence = 0 for item in bci_results: if item['found']: total_influence += item['fields']['user_index'][0] try: average_influence = total_influence/len(results) except: average_influence = 0 if portrait_results: for item in portrait_results: if item["found"]: temp = [] count += 1 temp.append(item['_id']) temp.append(item["fields"]["importance"][0]) in_portrait.append(temp) temp_domain = item["fields"]["domain"][0].split('&') temp_topic = item["fields"]["topic_string"][0].split('&') temp_geo = json.loads(item["fields"]["activity_geo_dict"][0])[-1].keys() #total_influence += item["fields"]["influence"][0] retweeted_domain = aggregation(temp_domain, retweeted_domain) retweeted_topic = aggregation(temp_topic, retweeted_topic) retweeted_geo = aggregation(temp_geo, retweeted_geo) else: out_portrait.append(item['_id']) retweeted_domain = proportion(retweeted_domain) retweeted_topic = proportion(retweeted_topic) retweeted_geo = proportion(retweeted_geo) #try: # average_influence = total_influence/count #except: # average_influence = 0 sorted_retweeted_domain = sorted(retweeted_domain.items(),key=lambda x:x[1], reverse=True) sorted_retweeted_topic = sorted(retweeted_topic.items(),key=lambda x:x[1], reverse=True) sorted_retweeted_geo = sorted(retweeted_geo.items(), key=lambda x:x[1], reverse=True) retweeted_results = dict() retweeted_results["domian"] = sorted_retweeted_domain[:5] retweeted_results["topic"] = sorted_retweeted_topic[:5] retweeted_results["geo"] = sorted_retweeted_geo[:5] retweeted_results["influence"] = average_influence in_portrait = sorted(in_portrait, key=lambda x:x[1], reverse=True) temp_list = [] for item in in_portrait: temp_list.append(item[0]) retweeted_results['in_portrait_number'] = len(temp_list) retweeted_results['out_portrait_number'] = len(out_portrait) in_portrait_url = get_user_url(temp_list[:default_number]) out_portrait_url = get_user_url(out_portrait[:default_number]) return_results = dict() return_results["influence_users"] = [in_portrait_url, out_portrait_url] return_results["influence_distribution"] = retweeted_results return return_results
def sensitive_attribute(uid, date): results = {} portrait = {} utype = user_type(uid) if not utype: results['utype'] = 0 return results results['utype'] = 1 results['uid'] = uid portrait_result = es.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source'] results['uname'] = portrait_result['uname'] if portrait_result['uname'] == 0: results['uname'] = 'unknown' if portrait_result['photo_url'] == 0: portrait_result['photo_url'] = 'unknown' if portrait_result['location'] == 0: portrait_result['location'] = 'unknown' results['photo_url'] = portrait_result['photo_url'] # sensitive weibo number statistics date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' # test influence_results = [] try: influence_results = es.get(index=date, doc_type='bci', id=uid)['_source'] results['sensitive_origin_weibo_number'] = influence_results.get( 's_origin_weibo_number', 0) results['sensitive_retweeted_weibo_number'] = influence_results.get( 's_retweeted_weibo_number', 0) results['sensitive_comment_weibo_number'] = int( influence_results.get('s_comment_weibo_number', 0)) results[ 'sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get( 's_retweeted_weibo_retweeted_total_number', 0) results[ 'sensitive_origin_weibo_retweeted_total_number'] = influence_results.get( 's_origin_weibo_retweeted_total_number', 0) results[ 'sensitive_origin_weibo_comment_total_number'] = influence_results.get( 's_origin_weibo_comment_total_number', 0) results[ 'sensitive_retweeted_weibo_comment_total_number'] = influence_results.get( 's_retweeted_weibo_comment_total_number', 0) except: results['sensitive_origin_weibo_number'] = 0 results['sensitive_retweeted_weibo_number'] = 0 results['sensitive_comment_weibo_number'] = 0 results['sensitive_origin_weibo_retweeted_total_number'] = 0 results['sensitive_origin_weibo_comment_total_number'] = 0 results['sensitive_retweeted_weibo_retweeted_total_number'] = 0 results['sensitive_retweeted_weibo_comment_total_number'] = 0 try: item = es.get(index=date, doc_type='bci', id=uid)['_source'] except: item = {} results['origin_weibo_total_number'] = item.get( 'origin_weibo_number', 0) + results['sensitive_origin_weibo_number'] results['retweeted_weibo_total_number'] = item.get( 'retweeted_weibo_number', 0) + results['sensitive_retweeted_weibo_number'] results['comment_weibo_total_number'] = int( item.get('comment_weibo_number', 0)) + int( results['sensitive_comment_weibo_number']) results['origin_weibo_retweeted_total_number'] = item.get( 'origin_weibo_retweeted_total_number', 0) + results['sensitive_origin_weibo_retweeted_total_number'] results['origin_weibo_comment_total_number'] = item.get( 'origin_weibo_comment_total_number', 0) + results['sensitive_origin_weibo_comment_total_number'] results['retweeted_weibo_retweeted_total_number'] = item.get( 'retweeted_weibo_retweeted_total_number', 0) + results['sensitive_retweeted_weibo_retweeted_total_number'] results['retweeted_weibo_comment_total_number'] = item.get( 'retweeted_weibo_comment_total_number', 0) + results['sensitive_retweeted_weibo_comment_total_number'] results['sensitive_text'] = sort_sensitive_text(uid) results['sensitive_geo_distribute'] = [] results['sensitive_time_distribute'] = get_user_trend(uid)[1] results['sensitive_hashtag'] = [] results['sensitive_words'] = [] results['sensitive_hashtag_dict'] = [] results['sensitive_words_dict'] = [] results['sensitive_hashtag_description'] = '' sentiment_trend = user_sentiment_trend(uid) emotion_number = sentiment_trend[0] results['negetive_index'] = float(emotion_number[2]) / ( emotion_number[2] + emotion_number[1] + emotion_number[0]) results['negetive_influence'] = float(emotion_number[1]) / ( emotion_number[2] + emotion_number[1] + emotion_number[0]) sentiment_dict = sentiment_trend[1] datetime = ts2datetime(time.time()).replace('-', '') return_sentiment = dict() return_sentiment['positive'] = [] return_sentiment['neutral'] = [] return_sentiment['negetive'] = [] ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(1, 8): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') temp = sentiment_dict.get(date, {}) return_sentiment['positive'].append([temp.get('positive', 0), date]) return_sentiment['negetive'].append([temp.get('negetive', 0), date]) return_sentiment['neutral'].append([temp.get('neutral', 0), date]) results['sentiment_trend'] = return_sentiment if 1: portrait_results = es.get(index="sensitive_user_portrait", doc_type='user', id=uid)['_source'] results['politics_trend'] = portrait_results['politics_trend'] results['domain'] = portrait_results['domain'] results['sensitive'] = portrait_results['sensitive'] temp_hashtag = portrait_results['sensitive_hashtag_dict'] temp_sensitive_words = portrait_results['sensitive_words_dict'] temp_sensitive_geo = portrait_results['sensitive_geo_activity'] if temp_sensitive_geo: sensitive_geo_dict = json.loads(temp_sensitive_geo) if len(sensitive_geo_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if sensitive_geo_dict.has_key(date): pass else: sensitive_geo_dict[date] = {} sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x: x[0], reverse=False) sensitive_geo_list = [] for k, v in sorted_sensitive_geo: temp_list = [] sorted_geo = sorted(v.items(), key=lambda x: x[1], reverse=True)[0:2] # print sorted_geo temp_list.extend([k, sorted_geo]) sensitive_geo_list.append(temp_list) results['sensitive_geo_distribute'] = sensitive_geo_list if temp_hashtag: hashtag_dict = json.loads( portrait_results['sensitive_hashtag_dict']) if len(hashtag_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if hashtag_dict.has_key(date): hashtag_dict_detail = hashtag_dict[date] hashtag_dict[date] = sorted( hashtag_dict_detail.items(), key=lambda x: x[1], reverse=True) else: hashtag_dict[date] = {} results['sensitive_hashtag_description'] = hashtag_description( hashtag_dict) else: hashtag_dict = {} if temp_sensitive_words: sensitive_words_dict = json.loads(temp_sensitive_words) if len(sensitive_words_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if sensitive_words_dict.has_key(date): pass else: sensitive_words_dict[date] = {} else: sensitive_words_dict = {} date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' today_sensitive_words = sensitive_words_dict.get(date, {}) results['today_sensitive_words'] = today_sensitive_words all_hashtag_dict = {} for item in hashtag_dict: detail_hashtag_dict = hashtag_dict[item] for key in detail_hashtag_dict: if all_hashtag_dict.has_key(key[0]): all_hashtag_dict[key[0]] += key[1] else: all_hashtag_dict[key[0]] = key[1] all_sensitive_words_dict = {} for item in sensitive_words_dict: detail_words_dict = sensitive_words_dict[item] for key in detail_words_dict: if all_sensitive_words_dict.has_key(key): all_sensitive_words_dict[key] += detail_words_dict[key] else: all_sensitive_words_dict[key] = detail_words_dict[key] sorted_hashtag = sorted(all_hashtag_dict.items(), key=lambda x: x[1], reverse=True) sorted_words = sorted(all_sensitive_words_dict.items(), key=lambda x: x[1], reverse=True) sorted_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x: x[0], reverse=False) sorted_words_dict = sorted(sensitive_words_dict.items(), key=lambda x: x[0], reverse=False) new_sorted_dict = sort_sensitive_words(sorted_words) results['sensitive_hashtag'] = sorted_hashtag results['sensitive_words'] = new_sorted_dict results['sensitive_hashtag_dict'] = sorted_hashtag_dict results['sensitive_words_dict'] = sorted_words_dict results['sensitive_retweet'] = search_retweet(uid, 1) results['sensitive_follow'] = search_follower(uid, 1) results['sensitive_at'] = search_mention(uid, 1) return results