def add_attribute_portrait(uid, attribute_name, attribute_value, submit_user): status = False # identify the user exist # identify the attribute exist # identify the attribute exist in user_portrait # add attribute in user_portrait # submit user should has power to change???without try: user_result = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source'] except: return 'no user' try: attribute_result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: return 'no attribute' attribute_value_list = attribute_result['attribute_value'].split('&') if attribute_value not in attribute_value_list: return 'no attribute value' if attribute_name in user_result: return 'attribute exist' add_attribute_dict = {attribute_name: attribute_value} es.update(index=user_index_name, doc_type=user_index_type, id=uid, body={'doc': add_attribute_dict}) status = True return status
def change_attribute_portrait(uid, attribute_name, attribute_value, submit_user): status = False #identify the user exist #identify the attribute exist #identify the attribute value exist #identify the submit_user have been admitted----without try: user_exist = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source'] except: return 'no user' try: attribute_result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: return 'no attribute' value_list = attribute_result['attribute_value'].split('&') if attribute_value not in value_list: return 'no attribute value' change_attribute_dict = {attribute_name: attribute_value} es.update(index=user_index_name, doc_type=user_index_type, id=uid, body={'doc': change_attribute_dict}) status = True return status
def add_tag2group(uid_list, attribute_name, attribute_value): status = False #identify the attribute exist #for uid in uid_list #identify the attribute not in this user #add tag to this user try: attribute_exist = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: return 'no attribute' attribute_exist_value_list = attribute_exist['attribute_value'].split('&') if attribute_value not in attribute_exist_value_list: return 'no attribute value' for uid in uid_list: try: user_exist = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source'] except: user_exist = {} if user_exist and attribute_name not in user_exist: add_attribute_dict = {attribute_name: attribute_value} es.update(index=user_index_name, doc_type=user_index_type, id=uid, body={'doc': add_attribute_dict}) status = True return status
def add_attribute_portrait(uid, attribute_name, attribute_value, submit_user): status = False # identify the user exist # identify the attribute exist # identify the attribute exist in user_portrait # add attribute in user_portrait # submit user should has power to change???without try: user_result = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source'] except: return 'no user' try: attribute_result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: return 'no attribute' attribute_value_list = attribute_result['attribute_value'].split('&') if attribute_value not in attribute_value_list: return 'no attribute value' if attribute_name in user_result: return 'attribute exist' add_attribute_dict = {attribute_name: attribute_value} es.update(index=user_index_name, doc_type=user_index_type, id=uid, body={'doc':add_attribute_dict}) status = True return status
def user_type(uid): try: result = es.get(index='sensitive_user_portrait', doc_type="user", id=uid)['_source']['type'] except: result = '' return result
def submit_task(input_data): status = 0 # mark it can not submit task_name = input_data['task_name'] try: result = es.get(index=index_name, doc_type=index_type, id=task_name) except: status = 1 if status != 0 and 'uid_file' not in input_data: r.lpush('group_task', json.dumps(input_data)) input_data['status'] = 0 # mark the task not compute count = len(input_data['uid_list']) input_data['count'] = count uid_list_string = json.dumps(input_data['uid_list']) es.index(index='group_result', doc_type='group', id=task_name, body=input_data) elif status != 0 and 'uid_file' in input_data: input_data['status'] = 0 # mark the task not compute uid_file = input_data['uid_file'] uid_list = read_uid_file(uid_file) input_data['count'] = len(uid_list) input_data['uid_list'] = json.dumps(uid_list) r.lpush('group_task', json.dumps(input_data)) es.index(index='group_result', doc_type='group', id=task_name, body=input_data) delete_status = delete_uid_file(uid_file) if delete_status == 0: print 'fail delete uid file' elif delete_status == 1: print 'success delete uid file' return status
def get_group_list(task_name): results = [] try: es_results = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] except: return results #print 'es_result:', es_results['uid_list'], type(es_results['uid_list']) uid_list = es_results['uid_list'] user_portrait_attribute = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs'] evaluate_max = get_evaluate_max() for item in user_portrait_attribute: uid = item['_id'] try: source = item['_source'] uname = source['uname'] gender = source['gender'] location = source['location'] importance = source['importance'] normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = source['influence'] normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100 results.append([uid, uname, gender, location, normal_importance, normal_influence]) except: results.append([uid]) return results
def identify_task(task_name): try: task_exist = es.get(index=task_index_name, doc_type=task_index_type, id=task_name)['_source'] except: task_exist = None return task_exist
def user_type(uid): try: result = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"]["type"] except: result = "" return result
def get_group_list(task_name): results = [] try: es_results = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] except: return results #print 'es_result:', es_results['uid_list'], type(es_results['uid_list']) uid_list = es_results['uid_list'] user_portrait_attribute = es.mget(index='user_portrait', doc_type='user', body={'ids': uid_list})['docs'] evaluate_max = get_evaluate_max() for item in user_portrait_attribute: uid = item['_id'] try: source = item['_source'] uname = source['uname'] gender = source['gender'] location = source['location'] importance = source['importance'] normal_importance = math.log( importance / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = source['influence'] normal_influence = math.log( influence / evaluate_max['influence'] * 9 + 1, 10) * 100 results.append([ uid, uname, gender, location, normal_importance, normal_influence ]) except: results.append([uid]) return results
def end_track_task(task_name): status = 0 try: task_exist = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] except: return 'task name not exist' task_status = task_exist['status'] if status == '0': return 'task have end' else: task_exist['status'] = 0 # made end time now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) time_segment = int((now_ts - now_date_ts) / 900) + 1 end_ts = now_date_ts + time_segment * 900 end_date = ts2date(end_ts) task_exist['end_date'] = end_date task_user = task_exist['uid_list'] status = change_user_count(task_user) if status == 0: return 'change user task count fail' else: es.index(index=index_name, doc_type=index_type, id=task_name, body=task_exist) status = delete_task_redis(task_name) if status == 0: return 'delete task from redis fail' else: return 'success change status to end'
def submit_task(input_data): status = 0 # mark it can not submit task_name = input_data['task_name'] try: result = es.get(index=index_name, doc_type=index_type, id=task_name) except: status = 1 if status != 0 and 'uid_file' not in input_data: r.lpush('group_task', json.dumps(input_data)) input_data['status'] = 0 # mark the task not compute count = len(input_data['uid_list']) input_data['count'] = count uid_list_string = json.dumps(input_data['uid_list']) es.index(index='group_result', doc_type='group', id=task_name, body=input_data) elif status != 0 and 'uid_file' in input_data: input_data['status'] = 0 # mark the task not compute uid_file = input_data['uid_file'] uid_list = read_uid_file(uid_file) input_data['count'] = len(uid_list) input_data['uid_list'] = json.dumps(uid_list) r.lpush('group_task', json.dumps(input_data)) es.index(index='group_result', doc_type='group', id=task_name, body=input_data) delete_status = delete_uid_file(uid_file) if delete_status == 0: print 'fail delete uid file' elif delete_status == 1: print 'success delete uid file' return status
def submit_track_task(input_data): ''' step1: identify the task_name is not exist step2: index new task_name step3: add user list to redis---should identify step4: add task to redis queue step5: add start_ts to redis hash----monitor_task_time_record ''' status = 0 task_name = input_data['task_name'] submit_date = input_data['submit_date'] try: result = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] return 'task_name exist' except: es.index(index=index_name, doc_type=index_type, id=task_name, body=input_data) task_user = input_data['uid_list'] status = add_user_set(task_user) if status == 0: return 'add user to redis set fail' else: status = add_task_redis(task_name) if status == 0: return 'add task to redis fail' else: status = add_task_record_time(task_name, submit_date) if status == 0: return 'add task record time fail' else: return 'success submit'
def end_track_task(task_name): status = 0 try: task_exist = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] except: return 'task name not exist' task_status = task_exist['status'] if status == '0': return 'task have end' else: task_exist['status'] = 0 # made end time now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) time_segment = int((now_ts - now_date_ts) / 900) + 1 end_ts = now_date_ts + time_segment * 900 end_date = ts2date(end_ts) task_exist['end_date'] = end_date task_user = task_exist['uid_list'] status = change_user_count(task_user) if status == 0: return 'change user task count fail' else: es.index(index=index_name, doc_type=index_type, id=task_name, body=task_exist) status = delete_task_redis(task_name) if status == 0: return 'delete task from redis fail' else: return 'success change status to end'
def identify_uid_in(uid): result = [] search_result = es.get(index='sensitive_user_portrait', doc_type="user", id=uid)['found'] return search_result
def submit_attribute(attribute_name, attribute_value, submit_user, submit_date): status = False #maybe there have to identify the user admitted to submit attribute try: attribute_exist = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['docs'] except: attribute_exist = {} try: source = attribute_exist['_source'] except: input_data = dict() now_ts = time.time() date = ts2datetime(now_ts) input_data['attribute_name'] = attribute_name input_data['attribute_value'] = '&'.join(attribute_value.split(',')) input_data['user'] = submit_user input_data['date'] = submit_date es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name, body=input_data) status = True return status
def submit_track_task(input_data): ''' step1: identify the task_name is not exist step2: index new task_name step3: add user list to redis---should identify step4: add task to redis queue step5: add start_ts to redis hash----monitor_task_time_record ''' status = 0 task_name = input_data['task_name'] submit_date = input_data['submit_date'] try: result = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] return 'task_name exist' except: es.index(index=index_name, doc_type=index_type, id=task_name, body=input_data) task_user = input_data['uid_list'] status = add_user_set(task_user) if status == 0: return 'add user to redis set fail' else: status = add_task_redis(task_name) if status == 0: return 'add task to redis fail' else: status = add_task_record_time(task_name, submit_date) if status == 0: return 'add task record time fail' else: return 'success submit'
def delete_attribute(attribute_name): status = False try: result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] print 'result:', result except Exception, e: raise e return status
def sort_sensitive_text(uid): sensitive_text = search_sensitive_text(uid) text_all = [] if sensitive_text: for item in sensitive_text: text_detail = [] item = item["_source"] if not item["sensitive"]: continue text = item["text"].encode("utf-8", "ignore") sentiment_dict = json.loads(item["sentiment"]) if not sentiment_dict: sentiment = 0 else: positive = len(sentiment_dict.get("126", {})) negetive = ( len(sentiment_dict.get("127", {})) + len(sentiment_dict.get("128", {})) + len(sentiment_dict.get("129", {})) ) if positive > negetive: sentiment = 1 elif positive < negetive: sentiment = -1 else: sentiment = 0 ts = item["timestamp"] uid = item["uid"] mid = item["mid"] message_type = item.get("message_type", 0) date = ts2datetime(float(ts)).replace("-", "") try: bci_result = es.get(index=date, doc_type="bci", id=uid)["_source"] if int(message_type) == 1: retweeted_number = bci_result["s_origin_weibo_retweeted_detail"].get(mid) comment_number = bci_result["s_origin_weibo_comment_detail"].get(mid) elif int(message_type) == 2: retweeted_number = bci_result["s_retweeted_weibo_retweeted_detail"].get(mid) comment_number = bci_result["s_retweeted_weibo_comment_detail"].get(mid) else: retweeted_number = 0 comment_number = 0 except: retweeted_number = 0 comment_number = 0 single_sw = item.get("sensitive_words", {}) if single_sw: sw = json.loads(single_sw).keys() else: # print item sw = [] geo = item["geo"] retweeted_link = extract_uname(text) text_detail.extend( [ts, geo, text, sw, retweeted_link, sentiment, message_type, retweeted_number, comment_number] ) text_all.append(text_detail) return text_all
def user_type(uid): try: result = es.get(index='sensitive_user_portrait', doc_type="user", id=uid)['_source']['type'] except: result = '' return result
def get_attribute_value(attribute_name): attribute_value_list = [] try: attribute_result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: return 'no attribute' print 'attribute_result:', attribute_result attribute_value_string = attribute_result['attribute_value'] attribute_value_list = attribute_value_string.split('&') return attribute_value_list
def delete_attribute(attribute_name): status = False try: result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] print 'result:', result except Exception, e: raise e return status
def get_attribute_value(attribute_name): attribute_value_list = [] try: attribute_result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: return 'no attribute' print 'attribute_result:', attribute_result attribute_value_string = attribute_result['attribute_value'] attribute_value_list = attribute_value_string.split('&') return attribute_value_list
def change_attribute_portrait(uid, attribute_name, attribute_value, submit_user): status = False #identify the user exist #identify the attribute exist #identify the attribute value exist #identify the submit_user have been admitted----without try: user_exist = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source'] except: return 'no user' try: attribute_result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: return 'no attribute' value_list = attribute_result['attribute_value'].split('&') if attribute_value not in value_list: return 'no attribute value' change_attribute_dict = {attribute_name: attribute_value} es.update(index=user_index_name, doc_type=user_index_type, id=uid, body={'doc': change_attribute_dict}) status = True return status
def sort_sensitive_text(uid): sensitive_text = search_sensitive_text(uid) text_all = [] if sensitive_text: for item in sensitive_text: text_detail = [] item = item['_source'] if not item['sensitive']: continue text = item['text'].encode('utf-8', 'ignore') sentiment_dict = json.loads(item['sentiment']) if not sentiment_dict: sentiment = 0 else: positive = len(sentiment_dict.get('126', {})) negetive = len(sentiment_dict.get('127', {})) + len(sentiment_dict.get('128', {})) + len(sentiment_dict.get('129', {})) if positive > negetive: sentiment = 1 elif positive < negetive: sentiment = -1 else: sentiment = 0 ts =item['timestamp'] uid = item['uid'] mid = item['mid'] message_type = item.get('message_type', 0) date = ts2datetime(float(ts)).replace('-', '') try: bci_result = es.get(index=date, doc_type='bci', id=uid)['_source'] if int(message_type) == 1: retweeted_number = bci_result['s_origin_weibo_retweeted_detail'].get(mid) comment_number = bci_result['s_origin_weibo_comment_detail'].get(mid) elif int(message_type) == 2: retweeted_number = bci_result['s_retweeted_weibo_retweeted_detail'].get(mid) comment_number = bci_result['s_retweeted_weibo_comment_detail'].get(mid) else: retweeted_number = 0 comment_number = 0 except: retweeted_number = 0 comment_number = 0 single_sw = item.get('sensitive_words', {}) if single_sw: sw = json.loads(single_sw).keys() else: # print item sw = [] geo = item['geo'] retweeted_link = extract_uname(text) text_detail.extend([ts, geo, text, sw, retweeted_link, sentiment, message_type, retweeted_number, comment_number]) text_all.append(text_detail) return text_all
def get_user_attribute_name(uid): result = [] user_result = es.get(index=user_index_name, doc_type=user_index_type, \ id=uid) print 'user_result:', user_result try: source = user_result['_source'] except: source = {} for key in source: if key not in identify_attribute_list: result.append(key) return result
def get_user_attribute_name(uid): result = [] user_result = es.get(index=user_index_name, doc_type=user_index_type, \ id=uid) print 'user_result:', user_result try: source = user_result['_source'] except: source = {} for key in source: if key not in identify_attribute_list: result.append(key) return result
def get_sensitive_word(task_name, timestamp): #step1: get task user #step2: get sensitive word from mid-result by condition: task_name, timestamp task_exist = identify_task(task_name) if not task_exist: return 'the task is not exist' try: task_mid_result = es.get(index=monitor_index_name, doc_type=task_name, id=str(timestamp))['_source'] except: result = None sensitive_word_dict = json.loads(task_mid_result['sensitive_word']) sort_sensitive_word = sorted(sensitive_word_dict.items(), key=lambda x:x[1], reverse=True) return sort_sensitive_word
def add_tag2group(uid_list, attribute_name, attribute_value): status = False #identify the attribute exist #for uid in uid_list #identify the attribute not in this user #add tag to this user try: attribute_exist = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: return 'no attribute' attribute_exist_value_list = attribute_exist['attribute_value'].split('&') if attribute_value not in attribute_exist_value_list: return 'no attribute value' for uid in uid_list: try: user_exist = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source'] except: user_exist = {} if user_exist and attribute_name not in user_exist: add_attribute_dict = {attribute_name: attribute_value} es.update(index=user_index_name, doc_type=user_index_type, id=uid, body={'doc':add_attribute_dict}) status = True return status
def get_network(task_exist): task_name = task_exist['task_name'] submit_date = task_exist['submit_date'] submit_ts = date2ts(submit_date) time_segment = 24*3600 now_ts = time.time() now_date = ts2datetime(now_ts) now_date_ts = datetime2ts(now_date) #test now_date_ts = datetime2ts('2013-09-07') iter_date_ts = now_date_ts iter_count = 1 date_list = [] top_list_dict = {} while True: if iter_count >= 8 or iter_date_ts < submit_ts: break iter_date = ts2datetime(iter_date_ts) date_list.append(iter_date) key = 'inner_' + str(iter_date) try: task_date_result = es.get(index=monitor_index_name, doc_type=task_name, id=key)['_source'] except: task_date_result = {} #print 'task_name, key, task_date_result:', task_name, key, task_date_result iter_field = ['top1', 'top2', 'top3', 'top4', 'top5'] for field in iter_field: user_count_item = json.loads(task_date_result[field]) uid = user_count_item[0] uname = uid2uname(uid) count = user_count_item[1] try: top_list_dict[field].append([uid, uname, count]) except: top_list_dict[field] = [[uid, uname, count]] iter_date_ts -= time_segment # get inner-retweet group from es---field: inner_graph ''' try: inner_graph = json.loads(task_date_result['inner_graph']) except: inner_graph = {} ''' abnormal_index = compute_inner_polarization(top_list_dict) return [date_list, top_list_dict, abnormal_index]
def delete_attribute_portrait(uid, attribute_name, submit_user): status = False #identify the user exist #identify the attribute value exist in es_user_portrait #identify the submit_user have been admitted---without try: user_exist = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source'] except: return 'no user' if attribute_name not in user_exist: return 'user have no attribtue' try: del_attribute_value = user_exist.pop(attribute_name) es.index(index=user_index_name, doc_type=user_index_type, id=uid, body=user_exist) status = True except Exception, e: raise e
def change_attribute(attribute_name, value, user, state): status = False # identify the attribute_name is in ES - custom attribute try: result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: result = None return status value_list = '&'.join(value.split(',')) result['attribute_name'] = attribute_name result['attribute_value'] = value_list result['user'] = user now_ts = time.time() now_date = ts2datetime(now_ts) result['date'] = now_date es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name ,body=result) status = True return status
def get_group_tag(group_name): result = {} order_result = [] #get group task uid list #get user tag #statistic tag try: group_task_result = es.get(index=group_index_name, doc_type=group_index_type, id=group_name) except: return 'no group task' try: uid_list = group_task_result['_source']['uid_list'] except: return 'no user' try: user_result = es.mget(index=user_index_name, doc_type=user_index_type, body={'ids': uid_list})['docs'] except Exception, e: raise e
def delete_track_task(task_name): status = 0 try: task_exist = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] except: return 'task not exist' task_user = task_exist['uid_list'] #change the user task_count in redis set #status = change_user_count(task_user) status = 1 if status==0: return 'change user count fail' else: #delete task from es result = es.delete(index=index_name, doc_type=index_type, id=task_name) status = delete_task_redis(task_name) if status == 0: return 'delete task from redis fail' else: return 'success delete task'
def submit_attribute(attribute_name, attribute_value, submit_user, submit_date): status = False #maybe there have to identify the user admitted to submit attribute try: attribute_exist = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['docs'] except: attribute_exist = {} try: source = attribute_exist['_source'] except: input_data = dict() now_ts = time.time() date = ts2datetime(now_ts) input_data['attribute_name'] = attribute_name input_data['attribute_value'] = '&'.join(attribute_value.split(',')) input_data['user'] = submit_user input_data['date'] = submit_date es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name, body=input_data) status = True return status
def delete_attribute_portrait(uid, attribute_name, submit_user): status = False #identify the user exist #identify the attribute value exist in es_user_portrait #identify the submit_user have been admitted---without try: user_exist = es.get(index=user_index_name, doc_type=user_index_type, id=uid)['_source'] except: return 'no user' if attribute_name not in user_exist: return 'user have no attribtue' try: del_attribute_value = user_exist.pop(attribute_name) es.index(index=user_index_name, doc_type=user_index_type, id=uid, body=user_exist) status = True except Exception, e: raise e
def get_group_tag(group_name): result = {} order_result = [] #get group task uid list #get user tag #statistic tag try: group_task_result = es.get(index=group_index_name, doc_type=group_index_type, id=group_name) except: return 'no group task' try: uid_list = group_task_result['_source']['uid_list'] except: return 'no user' try: user_result = es.mget(index=user_index_name, doc_type=user_index_type, body={'ids': uid_list})['docs'] except Exception, e: raise e
def delete_track_task(task_name): status = 0 try: task_exist = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] except: return 'task not exist' task_user = task_exist['uid_list'] #change the user task_count in redis set #status = change_user_count(task_user) status = 1 if status == 0: return 'change user count fail' else: #delete task from es result = es.delete(index=index_name, doc_type=index_type, id=task_name) status = delete_task_redis(task_name) if status == 0: return 'delete task from redis fail' else: return 'success delete task'
def change_attribute(attribute_name, value, user, state): status = False # identify the attribute_name is in ES - custom attribute try: result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source'] except: result = None return status value_list = '&'.join(value.split(',')) result['attribute_name'] = attribute_name result['attribute_value'] = value_list result['user'] = user now_ts = time.time() now_date = ts2datetime(now_ts) result['date'] = now_date es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name, body=result) status = True return status
def ajax_sort_sensitive_words(): level_order = request.args.get("level", "") # 0:all, 1:level 1, 2:level2, 3:level3 category_order = request.args.get("category", "") # '': all uid = request.args.get("uid", "") words_dict = es.get(index="sensitive_user_portrait", doc_type="user", id=uid)["_source"]["sensitive_words_dict"] words_dict = json.loads(words_dict) all_words_dict = dict() for v in words_dict.values(): for key in v: if all_words_dict.has_key(key): all_words_dict[key] += v[key] else: all_words_dict[key] = v[key] sorted_words = sorted(all_words_dict.items(), key=lambda x: x[1], reverse=True) new_words_list = sort_sensitive_words(sorted_words) print new_words_list if 1: level_1 = [] level_2 = [] level_3 = [] for item in new_words_list: if int(item[2]) == 1: if not category_order: level_1.append(item) else: if item[3] == category_order: level_1.append(item) else: pass elif int(item[2]) == 2: if not category_order: level_2.append(item) else: if item[3] == category_order: level_2.append(item) else: pass elif int(item[2]) == 3: if not category_order: level_3.append(item) else: if item[3] == category_order: level_3.append(item) else: pass new_list = [] if int(level_order) == 0: if not category_order: return json.dumps(new_words_list) else: new_list.extend(level_1) new_list.extend(level_2) new_list.extend(level_3) elif int(level_order) == 1: new_list = level_1 elif int(level_order) == 2: new_list = level_2 else: new_list = level_3 return json.dumps(new_list)
def identify_uid_in(uid): result= [] search_result = es.get(index='sensitive_user_portrait', doc_type="user", id=uid)['found'] return search_result
def sensitive_attribute(uid, date): results = {} portrait = {} utype = user_type(uid) if not utype: results['utype'] = 0 return results results['utype'] = 1 results['uid'] = uid portrait_result = es.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source'] results['uname'] = portrait_result['uname'] if portrait_result['uname'] == 0: results['uname'] = 'unknown' if portrait_result['photo_url'] == 0: portrait_result['photo_url'] = 'unknown' if portrait_result['location'] == 0: portrait_result['location'] = 'unknown' results['photo_url'] = portrait_result['photo_url'] # sensitive weibo number statistics date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' # test influence_results = [] try: influence_results = es.get(index=date, doc_type='bci', id=uid)['_source'] results['sensitive_origin_weibo_number'] = influence_results.get( 's_origin_weibo_number', 0) results['sensitive_retweeted_weibo_number'] = influence_results.get( 's_retweeted_weibo_number', 0) results['sensitive_comment_weibo_number'] = int( influence_results.get('s_comment_weibo_number', 0)) results[ 'sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get( 's_retweeted_weibo_retweeted_total_number', 0) results[ 'sensitive_origin_weibo_retweeted_total_number'] = influence_results.get( 's_origin_weibo_retweeted_total_number', 0) results[ 'sensitive_origin_weibo_comment_total_number'] = influence_results.get( 's_origin_weibo_comment_total_number', 0) results[ 'sensitive_retweeted_weibo_comment_total_number'] = influence_results.get( 's_retweeted_weibo_comment_total_number', 0) except: results['sensitive_origin_weibo_number'] = 0 results['sensitive_retweeted_weibo_number'] = 0 results['sensitive_comment_weibo_number'] = 0 results['sensitive_origin_weibo_retweeted_total_number'] = 0 results['sensitive_origin_weibo_comment_total_number'] = 0 results['sensitive_retweeted_weibo_retweeted_total_number'] = 0 results['sensitive_retweeted_weibo_comment_total_number'] = 0 try: item = es.get(index=date, doc_type='bci', id=uid)['_source'] except: item = {} results['origin_weibo_total_number'] = item.get( 'origin_weibo_number', 0) + results['sensitive_origin_weibo_number'] results['retweeted_weibo_total_number'] = item.get( 'retweeted_weibo_number', 0) + results['sensitive_retweeted_weibo_number'] results['comment_weibo_total_number'] = int( item.get('comment_weibo_number', 0)) + int( results['sensitive_comment_weibo_number']) results['origin_weibo_retweeted_total_number'] = item.get( 'origin_weibo_retweeted_total_number', 0) + results['sensitive_origin_weibo_retweeted_total_number'] results['origin_weibo_comment_total_number'] = item.get( 'origin_weibo_comment_total_number', 0) + results['sensitive_origin_weibo_comment_total_number'] results['retweeted_weibo_retweeted_total_number'] = item.get( 'retweeted_weibo_retweeted_total_number', 0) + results['sensitive_retweeted_weibo_retweeted_total_number'] results['retweeted_weibo_comment_total_number'] = item.get( 'retweeted_weibo_comment_total_number', 0) + results['sensitive_retweeted_weibo_comment_total_number'] results['sensitive_text'] = sort_sensitive_text(uid) results['sensitive_geo_distribute'] = [] results['sensitive_time_distribute'] = get_user_trend(uid)[1] results['sensitive_hashtag'] = [] results['sensitive_words'] = [] results['sensitive_hashtag_dict'] = [] results['sensitive_words_dict'] = [] results['sensitive_hashtag_description'] = '' sentiment_trend = user_sentiment_trend(uid) emotion_number = sentiment_trend[0] results['negetive_index'] = float(emotion_number[2]) / ( emotion_number[2] + emotion_number[1] + emotion_number[0]) results['negetive_influence'] = float(emotion_number[1]) / ( emotion_number[2] + emotion_number[1] + emotion_number[0]) sentiment_dict = sentiment_trend[1] datetime = ts2datetime(time.time()).replace('-', '') return_sentiment = dict() return_sentiment['positive'] = [] return_sentiment['neutral'] = [] return_sentiment['negetive'] = [] ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(1, 8): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') temp = sentiment_dict.get(date, {}) return_sentiment['positive'].append([temp.get('positive', 0), date]) return_sentiment['negetive'].append([temp.get('negetive', 0), date]) return_sentiment['neutral'].append([temp.get('neutral', 0), date]) results['sentiment_trend'] = return_sentiment if 1: portrait_results = es.get(index="sensitive_user_portrait", doc_type='user', id=uid)['_source'] results['politics_trend'] = portrait_results['politics_trend'] results['domain'] = portrait_results['domain'] results['sensitive'] = portrait_results['sensitive'] temp_hashtag = portrait_results['sensitive_hashtag_dict'] temp_sensitive_words = portrait_results['sensitive_words_dict'] temp_sensitive_geo = portrait_results['sensitive_geo_activity'] if temp_sensitive_geo: sensitive_geo_dict = json.loads(temp_sensitive_geo) if len(sensitive_geo_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if sensitive_geo_dict.has_key(date): pass else: sensitive_geo_dict[date] = {} sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x: x[0], reverse=False) sensitive_geo_list = [] for k, v in sorted_sensitive_geo: temp_list = [] sorted_geo = sorted(v.items(), key=lambda x: x[1], reverse=True)[0:2] # print sorted_geo temp_list.extend([k, sorted_geo]) sensitive_geo_list.append(temp_list) results['sensitive_geo_distribute'] = sensitive_geo_list if temp_hashtag: hashtag_dict = json.loads( portrait_results['sensitive_hashtag_dict']) if len(hashtag_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if hashtag_dict.has_key(date): hashtag_dict_detail = hashtag_dict[date] hashtag_dict[date] = sorted( hashtag_dict_detail.items(), key=lambda x: x[1], reverse=True) else: hashtag_dict[date] = {} results['sensitive_hashtag_description'] = hashtag_description( hashtag_dict) else: hashtag_dict = {} if temp_sensitive_words: sensitive_words_dict = json.loads(temp_sensitive_words) if len(sensitive_words_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if sensitive_words_dict.has_key(date): pass else: sensitive_words_dict[date] = {} else: sensitive_words_dict = {} date = ts2datetime(time.time() - 24 * 3600).replace('-', '') date = '20130907' today_sensitive_words = sensitive_words_dict.get(date, {}) results['today_sensitive_words'] = today_sensitive_words all_hashtag_dict = {} for item in hashtag_dict: detail_hashtag_dict = hashtag_dict[item] for key in detail_hashtag_dict: if all_hashtag_dict.has_key(key[0]): all_hashtag_dict[key[0]] += key[1] else: all_hashtag_dict[key[0]] = key[1] all_sensitive_words_dict = {} for item in sensitive_words_dict: detail_words_dict = sensitive_words_dict[item] for key in detail_words_dict: if all_sensitive_words_dict.has_key(key): all_sensitive_words_dict[key] += detail_words_dict[key] else: all_sensitive_words_dict[key] = detail_words_dict[key] sorted_hashtag = sorted(all_hashtag_dict.items(), key=lambda x: x[1], reverse=True) sorted_words = sorted(all_sensitive_words_dict.items(), key=lambda x: x[1], reverse=True) sorted_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x: x[0], reverse=False) sorted_words_dict = sorted(sensitive_words_dict.items(), key=lambda x: x[0], reverse=False) new_sorted_dict = sort_sensitive_words(sorted_words) results['sensitive_hashtag'] = sorted_hashtag results['sensitive_words'] = new_sorted_dict results['sensitive_hashtag_dict'] = sorted_hashtag_dict results['sensitive_words_dict'] = sorted_words_dict results['sensitive_retweet'] = search_retweet(uid, 1) results['sensitive_follow'] = search_follower(uid, 1) results['sensitive_at'] = search_mention(uid, 1) return results
def imagine(uid, query_fields_dict,index_name="sensitive_user_portrait", doctype='user'): """ uid: search users relate to uid query_fields_dict: defined search field weight fields: domain, topic, keywords, psycho_status, psycho_feature, activity_geo, hashtag for example: "domain": 2 domain, psycho_feature """ personal_info = es.get(index="sensitive_user_portrait", doc_type="user", id=uid, _source=True)['_source'] keys_list = query_fields_dict.keys() keys_list.remove('field') keys_list.remove('size') search_dict = {} iter_list = [] for iter_key in keys_list: if iter_key not in personal_info or personal_info[iter_key] == '': query_fields_dict.pop(iter_key) else: iter_list.append(iter_key) temp = personal_info[iter_key] search_dict[iter_key] = temp.split('&') if len(iter_list) == 0: return [] query_body = { 'query':{ 'function_score':{ 'query':{ 'bool':{ 'must':[ ] } }, "field_value_factor":{ } } } } score_standard = {} score_standard["modifier"] = "log1p" if query_fields_dict['field'] == "activeness": score_standard['field'] = "activeness" score_standard['factor'] = 100 elif query_fields_dict['field'] == "importance": score_standard['field'] = "importance" score_standard['factor'] = 0.01 elif query_fields_dict['field'] == "sensitive": score_standard['field'] = "sensitive" score_standard['factor'] = 100 elif query_fields_dict['field'] == 'influence': score_standard['field'] = "influence" score_standard['factor'] = 0.1 else: score_standard['field'] = "influence" score_standard['factor'] = 0 query_body['query']['function_score']['boost_mode'] = "sum" query_body['query']['function_score']['field_value_factor'] = score_standard query_fields_dict.pop('field') number = es.count(index=index_name, doc_type=doctype, body=query_body)['count'] query_body['size'] = 100 # default number query_number = query_fields_dict['size'] # required number query_fields_dict.pop('size') for (k,v) in query_fields_dict.items(): temp = {} temp_list = [] for iter_key in search_dict[k]: temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}}) query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}}) result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits'] field_list = ['uid','uname', 'activeness','importance', 'influence', 'sensitive'] return_list = [] count = 0 for item in result: if uid == item['_id']: score = item['_score'] continue info = [] if not item['_source']['uname']: item['_source']['uname'] = 'unknown' for field in field_list: info.append(item['_source'][field]) info.append(item['_score']) common_dict = dict() for iter_key in iter_list: iter_common_list = item['_source'][iter_key].split('&') search_common_list = list(set(iter_common_list) & set(search_dict[iter_key])) iter_key = shift_dict[iter_key] common_dict[iter_key] = search_common_list info.append(common_dict) return_list.append(info) count += 1 if count == query_number: break return_list.append(number) temp_list = [] for field in field_list: temp_list.append(personal_info[field]) results = [] results.append(temp_list) results.extend(return_list) return results
def search_attribute_portrait(uid): return_results = {} index_name = "sensitive_user_portrait" index_type = "user" try: search_result = es.get(index=index_name, doc_type=index_type, id=uid) except: return None results = search_result['_source'] #return_results = results user_sensitive = user_type(uid) if user_sensitive: #return_results.update(sensitive_attribute(uid)) return_results['user_type'] = 1 return_results['sensitive'] = 1 else: return_results['user_type'] = 0 return_results['sensitive'] = 0 if results['photo_url'] == 0: results['photo_url'] = 'unknown' if results['location'] == 0: results['location'] = 'unknown' return_results['photo_url'] = results['photo_url'] return_results['uid'] = results['uid'] return_results['uname'] = results['uname'] if return_results['uname'] == 0: return_results['uname'] = 'unknown' return_results['location'] = results['location'] return_results['fansnum'] = results['fansnum'] return_results['friendsnum'] = results['friendsnum'] return_results['gender'] = results['gender'] return_results['psycho_status'] = json.loads(results['psycho_status']) keyword_list = [] if results['keywords']: keywords_dict = json.loads(results['keywords']) sort_word_list = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True) return_results['keywords'] = sort_word_list else: return_results['keywords'] = [] return_results['retweet'] = search_retweet(uid, 0) return_results['follow'] = search_follower(uid, 0) return_results['at'] = search_mention(uid, 0) if results['ip'] and results['geo_activity']: ip_dict = json.loads(results['ip']) geo_dict = json.loads(results['geo_activity']) geo_description = active_geo_description(ip_dict, geo_dict) return_results['geo_description'] = geo_description else: return_results['geo_description'] = '' geo_top = [] temp_geo = {} if results['geo_activity']: geo_dict = json.loads(results['geo_activity']) if len(geo_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(7): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') if geo_dict.has_key(date): pass else: geo_dict[date] = {} activity_geo_list = sorted(geo_dict.items(), key=lambda x:x[0], reverse=False) geo_list = geo_dict.values() for k,v in activity_geo_list: sort_v = sorted(v.items(), key=lambda x:x[1], reverse=True) top_geo = [item[0] for item in sort_v] geo_top.append([k, top_geo[0:2]]) for iter_key in v.keys(): if temp_geo.has_key(iter_key): temp_geo[iter_key] += v[iter_key] else: temp_geo[iter_key] = v[iter_key] sort_geo_dict = sorted(temp_geo.items(), key=lambda x:x[1], reverse=True) return_results['top_activity_geo'] = sort_geo_dict return_results['activity_geo_distribute'] = geo_top else: return_results['top_activity_geo'] = [] return_results['activity_geo_distribute'] = geo_top hashtag_dict = get_user_hashtag(uid)[0] return_results['hashtag'] = hashtag_dict ''' emotion_result = {} emotion_conclusion_dict = {} if results['emotion_words']: emotion_words_dict = json.loads(results['emotion_words']) for word_type in emotion_mark_dict: try: word_dict = emotion_words_dict[word_type] if word_type=='126' or word_type=='127': emotion_conclusion_dict[word_type] = word_dict sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True) word_list = sort_word_dict[:5] except: results['emotion_words'] = emotion_result emotion_result[emotion_mark_dict[word_type]] = word_list return_results['emotion_words'] = emotion_result ''' # topic if results['topic']: topic_dict = json.loads(results['topic']) sort_topic_dict = sorted(topic_dict.items(), key=lambda x:x[1], reverse=True) return_results['topic'] = sort_topic_dict[:5] else: return_results['topic'] = [] # domain if results['domain']: domain_string = results['domain'] domain_list = domain_string.split('_') return_results['domain'] = domain_list else: return_results['domain'] = [] ''' # emoticon if results['emotion']: emotion_dict = json.loads(results['emotion']) sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True) return_results['emotion'] = sort_emotion_dict[:5] else: return_results['emotion'] = [] ''' # on_line pattern if results['online_pattern']: online_pattern_dict = json.loads(results['online_pattern']) sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x:x[1], reverse=True) return_results['online_pattern'] = sort_online_pattern_dict[:5] else: return_results['online_pattern'] = [] ''' #psycho_feature if results['psycho_feature']: psycho_feature_list = results['psycho_feature'].split('_') return_results['psycho_feature'] = psycho_feature_list else: return_results['psycho_feature'] = [] ''' # self_state try: profile_result = es_user_profile.get(index='weibo_user', doc_type='user', id=uid) self_state = profile_result['_source'].get('description', '') return_results['description'] = self_state except: return_results['description'] = '' if results['importance']: query_body = { 'query':{ 'range':{ 'importance':{ 'from':results['importance'], 'to': 100000 } } } } importance_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if importance_rank['_shards']['successful'] != 0: return_results['importance_rank'] = importance_rank['count'] else: return_results['importance_rank'] = 0 else: return_results['importance_rank'] = 0 return_results['importance'] = results['importance'] if results['activeness']: query_body = { 'query':{ 'range':{ 'activeness':{ 'from':results['activeness'], 'to': 10000 } } } } activeness_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if activeness_rank['_shards']['successful'] != 0: return_results['activeness_rank'] = activeness_rank['count'] else: return_results['activeness_rank'] = 0 else: return_results['activeness_rank'] = 0 return_results['activeness'] = results['activeness'] if results['influence']: query_body = { 'query':{ 'range':{ 'influence':{ 'from':results['influence'], 'to': 100000 } } } } influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if influence_rank['_shards']['successful'] != 0: return_results['influence_rank'] = influence_rank['count'] else: return_results['influence_rank'] = 0 else: return_results['influence_rank'] = 0 return_results['influence'] = results['influence'] if results['sensitive']: query_body = { 'query':{ 'range':{ 'sensitive':{ 'from':results['sensitive'], 'to': 100000 } } } } influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if influence_rank['_shards']['successful'] != 0: return_results['sensitive_rank'] = influence_rank['count'] else: return_results['sensitive_rank'] = 0 else: return_results['sensitive_rank'] = 0 return_results['sensitive'] = results['sensitive'] query_body = { 'query':{ "match_all":{} } } all_count = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if all_count['_shards']['successful'] != 0: return_results['all_count'] = all_count['count'] else: print 'es_sensitive_user_portrait error' return_results['all_count'] = 0 # link link_ratio = results['link'] return_results['link'] = link_ratio weibo_trend = get_user_trend(uid)[0] return_results['time_description'] = active_time_description(weibo_trend) return_results['time_trend'] = weibo_trend # user influence trend influence_detail = [] influence_value = [] attention_value = [] ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(1,8): date = ts2datetime(ts + i*24*3600).replace('-', '') detail = [0]*10 try: item = es.get(index=date, doc_type='bci', id=uid)['_source'] ''' if return_results['utype']: detail[0] = item.get('s_origin_weibo_number', 0) detail[1] = item.get('s_retweeted_weibo_number', 0) detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0) detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0) else: ''' if 1: detail[0] = item.get('origin_weibo_number', 0) detail[1] = item.get('retweeted_weibo_number', 0) detail[2] = item.get('origin_weibo_retweeted_total_number', 0) + item.get('retweeted_weibo_retweeted_total_number', 0) detail[3] = item.get('origin_weibo_comment_total_number', 0) + item.get('retweeted_weibo_comment_total_number', 0) retweeted_id = item.get('origin_weibo_top_retweeted_id', '0') detail[4] = retweeted_id if retweeted_id: try: detail[5] = es.get(index='sensitive_user_text', doc_type='user', id=retweeted_id)['_source']['text'] except: detail[5] = '' else: detail[5] = '' detail[6] = item.get('origin_weibo_retweeted_top_number', 0) detail[7] = item.get('origin_weibo_top_comment_id', '0') if detail[7]: try: detail[8] = es.get(index='sensitive_user_text', doc_type='user', id=detail[7])['_source']['text'] except: detail[8] = '' else: detail[8] = '' detail[9] = item.get('origin_weibo_comment_top_number', 0) attention_number = detail[2] + detail[3] attention = 2/(1+math.exp(-0.005*attention_number)) - 1 influence_value.append([date, item['user_index']]) influence_detail.append([date, detail]) attention_value.append(attention) except: influence_value.append([date, 0]) influence_detail.append([date, detail]) attention_value.append(0) return_results['influence_trend'] = influence_value return_results['common_influence_detail'] = influence_detail return_results['attention_degree'] = attention_value return return_results
def get_group_results(task_name, module): result = [] try: es_result = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] #print 'result:', result except: return None #basic module: gender, count, verified if module == 'overview': task_name = es_result['task_name'] submit_date = es_result['submit_date'] state = es_result['state'] tightness = es_result['tightness'] activeness = es_result['activeness'] importance = es_result['importance'] influence = es_result['influence'] result = [ task_name, submit_date, state, tightness, activeness, importance, influence ] if module == 'basic': gender_dict = json.loads(es_result['gender']) count = es_result['count'] verified = es_result['verified'] if verified: verified_dict = json.loads(verified) result = [gender_dict, count, verified] if module == 'activity': activity_geo_dict = json.loads(es_result['activity_geo']) sort_activity_geo = sorted(activity_geo_dict.items(), key=lambda x: x[1], reverse=True) activity_geo = sort_activity_geo[:50] activity_trend = json.loads(es_result['activity_trend']) online_pattern_dict = json.loads(es_result['online_pattern']) sort_online_pattern = sorted(online_pattern_dict.items(), key=lambda x: x[1], reverse=True) online_pattern = sort_online_pattern[:50] geo_track = json.loads(es_result['geo_track']) result = [activity_geo, activity_trend, online_pattern, geo_track] if module == 'social': #degree_his = json.loads(es_result['degree_his']) density = es_result['density'] retweet_weibo_count = es_result['retweet_weibo_count'] retweet_user_count = es_result['retweet_user_count'] retweet_relation = json.loads(es_result['retweet_relation']) uid_list = [] for relation in retweet_relation: uid_list.append(relation[0]) uid_list.append(relation[1]) es_portrait_result = es.mget(index='user_portrait', doc_type='user', body={'ids': uid_list})['docs'] es_count = 0 new_retweet_relation = [] for relation in retweet_relation: source_uid = relation[0] source_item = es_portrait_result[es_count] try: source = source_item['_source'] source_uname = source['uname'] except: source_uname = '' target_uid = relation[1] es_count += 1 target_item = es_portrait_result[es_count] try: source = target_item['_source'] target_uname = source['uname'] except: target_uname = '' count = relation[2] new_retweet_relation.append( [source_uid, source_uname, target_uid, target_uname, count]) uid_list = [] out_beretweet_relation = json.loads( es_result['out_beretweet_relation']) uid_list = [] uid_list = [item[0] for item in out_beretweet_relation] es_portrait_result = es.mget(index='user_portrait', doc_type='user', body={'ids': uid_list})['docs'] es_count = 0 new_out_beretweet_relation = [] for i in range(len(uid_list)): item = es_portrait_result[i] uid = item['_id'] try: source = item['_source'] uname = source['uname'] except: uname = '' out_relation_item = out_beretweet_relation[i][1:] a = [uid, uname] a.extend(out_relation_item) #print 'add_item:', add_item new_out_beretweet_relation.append(a) result = [ new_retweet_relation, density, retweet_weibo_count, retweet_user_count, new_out_beretweet_relation ] if module == 'think': domain_dict = json.loads(es_result['domain']) topic_dict = json.loads(es_result['topic']) psycho_status = json.loads(es_result['psycho_status']) psycho_feature = json.loads(es_result['psycho_feature']) result = [domain_dict, topic_dict, psycho_status, psycho_feature] if module == 'text': hashtag_dict = json.loads(es_result['hashtag']) sort_hashtag = sorted(hashtag_dict.items(), key=lambda x: x[1], reverse=True) hashtag = sort_hashtag[:50] emoticon_dict = json.loads(es_result['emoticon']) sort_emoticon = sorted(emoticon_dict.items(), key=lambda x: x[1], reverse=True) emoticon = sort_emoticon[:5] keyword_dict = json.loads(es_result['keywords']) sort_keyword = sorted(keyword_dict.items(), key=lambda x: x[1], reverse=True) keyword = sort_keyword[:50] result = [hashtag, keyword, emoticon] if module == 'influence': importance_dis = json.loads(es_result['importance_his']) activeness_his = json.loads(es_result['activeness_his']) influence_his = json.loads(es_result['influence_his']) user_influence_list = json.loads(es_result['user_influence_list']) user_influence_result = [] for user_item in user_influence_list: uid = user_item[0] result_item = user_item[:5] for i in range(5, 9): item = user_item[i] mid = item[1] number = item[0] if mid != 0 and uid: weibolink = weiboinfo2url(uid, mid) else: weibolink = None result_item.append((number, mid, weibolink)) user_influence_result.append(result_item) ''' origin_max_retweeted_number =es_result['origin_max_retweeted_number'] origin_max_retweeted_id = es_result['origin_max_retweeted_id'] origin_max_retweeted_user = es_result['origin_max_retweeted_user'] if origin_max_retweeted_id != 0 and origin_max_retweeted_user != 0: origin_max_retweeted_weibolink = weiboinfo2url(origin_max_retweeted_user, origin_max_retweeted_id) else: origin_max_retweeted_weibolink = None origin_max_comment_number = es_result['origin_max_comment_number'] origin_max_comment_id = es_result['origin_max_comment_id'] origin_max_comment_user = es_result['origin_max_comment_user'] if origin_max_comment_id !=0 and origin_max_comment_user != 0: origin_max_comment_weibolink = weiboinfo2url(origin_max_comment_user, origin_max_comment_id) else: origin_max_comment_weibolink = None retweet_max_retweeted_number = es_result['retweet_max_retweeted_number'] retweet_max_retweeted_id = es_result['retweet_max_retweeted_id'] retweet_max_retweeted_user = es_result['retweet_max_retweeted_user'] if retweet_max_retweeted_id != 0 and retweet_max_retweeted_user != 0: retweet_max_retweeted_weibolink = weiboinfo2url(retweet_max_retweeted_user, retweet_max_retweeted_id) else: retweet_max_retweeted_weibolink = None retweet_max_comment_number = es_result['retweet_max_comment_number'] retweet_max_comment_id = es_result['retweet_max_comment_id'] retweet_max_comment_user = es_result['retweet_max_comment_user'] if retweet_max_comment_id != 0 and retweet_max_comment_user != 0: retweet_max_comment_weibolink = weiboinfo2url(retweet_max_comment_user, retweet_max_comment_id) else: retweet_max_comment_weibolink = None ''' result = [ importance_dis, activeness_his, influence_his, user_influence_result ] #print result return result
def ajax_portrait_related(): uid = request.args.get('uid', '') # uid results = dict() if uid: portrait_result = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid)['_source'] results["domain"] = portrait_result['domain'] results["topic"] = portrait_result["topic_string"].replace("&", " ") results["politics"] = portrait_result["politics"] custom_tag = [] for key in portrait_result: if "tag-" in key: tag_value = portrait_result[key] temp_list = key.split("-") key = "-".join(temp_list[1:]) custom_string = key + "-" + tag_value custom_tag.append([key, tag_value]) if custom_tag: results["tag_detail"] = custom_tag results["tag_string"] = custom_string else: results["tag_detail"] = [] results["tag_string"] = "" sensitive_words_dict = json.loads(portrait_result["sensitive_words_dict"]) if sensitive_words_dict: sorted_sensitive_words = sorted(sensitive_words_dict.items(), key=lambda x:x[1], reverse=True) tmp = sorted_sensitive_words[:3] sensitive_words_list = [item[0] for item in tmp] results["sensitive_words_string"] = " ".join(sensitive_words_list) results["sensitive_words_detail"] = sorted_sensitive_words else: results["sensitive_words_string"] = "" results["sensitive_words_detail"] = [] keywords_dict = json.loads(portrait_result["keywords_dict"]) results["keywords_detail"] = keywords_dict if keywords_dict: tmp = keywords_dict[:3] keywords_list = [item[0] for item in tmp] results["keywords_string"] = " ".join(keywords_list) else: results["keywords_string"] = "" activity_geo_dict = json.loads(portrait_result["activity_geo_dict"]) geo_dict = {} for item in activity_geo_dict: for k, v in item.iteritems(): if geo_dict.has_key(k): geo_dict[k] += v else: geo_dict[k] = v if geo_dict: sorted_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True) results["geo_activity"] = sorted_geo_dict[0][0] results["geo_activity_detail"] = sorted_geo_dict else: results["geo_activity"] = "" results["geo_activity_detail"] = [] hashtag_dict = json.loads(portrait_result["hashtag_dict"]) sorted_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x:x[1], reverse=True) if sorted_hashtag_dict: results["hashtag"] = sorted_hashtag_dict[0][0] results["hashtag_detail"] = sorted_hashtag_dict else: results["hashtag"] = "" results["hashtag_detail"] = [] return json.dumps(results)
def get_group_results(task_name, module): result = [] try: es_result = es.get(index=index_name, doc_type=index_type, id=task_name)['_source'] #print 'result:', result except: return None #basic module: gender, count, verified if module=='overview': task_name = es_result['task_name'] submit_date = es_result['submit_date'] state = es_result['state'] tightness = es_result['tightness'] activeness = es_result['activeness'] importance = es_result['importance'] influence = es_result['influence'] result = [task_name, submit_date, state, tightness, activeness, importance, influence] if module=='basic': gender_dict = json.loads(es_result['gender']) count = es_result['count'] verified = es_result['verified'] if verified: verified_dict = json.loads(verified) result = [gender_dict, count, verified] if module=='activity': activity_geo_dict = json.loads(es_result['activity_geo']) sort_activity_geo = sorted(activity_geo_dict.items(), key=lambda x:x[1], reverse=True) activity_geo = sort_activity_geo[:50] activity_trend = json.loads(es_result['activity_trend']) online_pattern_dict = json.loads(es_result['online_pattern']) sort_online_pattern = sorted(online_pattern_dict.items(), key=lambda x:x[1], reverse=True) online_pattern = sort_online_pattern[:50] geo_track = json.loads(es_result['geo_track']) result = [activity_geo, activity_trend, online_pattern, geo_track] if module=='social': #degree_his = json.loads(es_result['degree_his']) density = es_result['density'] retweet_weibo_count = es_result['retweet_weibo_count'] retweet_user_count = es_result['retweet_user_count'] retweet_relation = json.loads(es_result['retweet_relation']) uid_list = [] for relation in retweet_relation: uid_list.append(relation[0]) uid_list.append(relation[1]) es_portrait_result = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs'] es_count = 0 new_retweet_relation = [] for relation in retweet_relation: source_uid = relation[0] source_item = es_portrait_result[es_count] try: source = source_item['_source'] source_uname = source['uname'] except: source_uname = '' target_uid = relation[1] es_count += 1 target_item = es_portrait_result[es_count] try: source = target_item['_source'] target_uname = source['uname'] except: target_uname = '' count = relation[2] new_retweet_relation.append([source_uid, source_uname, target_uid, target_uname, count]) uid_list = [] out_beretweet_relation = json.loads(es_result['out_beretweet_relation']) uid_list = [] uid_list = [item[0] for item in out_beretweet_relation] es_portrait_result = es.mget(index='user_portrait', doc_type='user', body={'ids':uid_list})['docs'] es_count = 0 new_out_beretweet_relation = [] for i in range(len(uid_list)): item = es_portrait_result[i] uid = item['_id'] try: source = item['_source'] uname = source['uname'] except: uname = '' out_relation_item = out_beretweet_relation[i][1:] a = [uid, uname] a.extend(out_relation_item) #print 'add_item:', add_item new_out_beretweet_relation.append(a) result = [new_retweet_relation, density, retweet_weibo_count, retweet_user_count, new_out_beretweet_relation] if module=='think': domain_dict = json.loads(es_result['domain']) topic_dict = json.loads(es_result['topic']) psycho_status = json.loads(es_result['psycho_status']) psycho_feature = json.loads(es_result['psycho_feature']) result = [domain_dict, topic_dict, psycho_status, psycho_feature] if module=='text': hashtag_dict = json.loads(es_result['hashtag']) sort_hashtag = sorted(hashtag_dict.items(), key=lambda x:x[1], reverse=True) hashtag = sort_hashtag[:50] emoticon_dict = json.loads(es_result['emoticon']) sort_emoticon = sorted(emoticon_dict.items(), key=lambda x:x[1], reverse=True) emoticon = sort_emoticon[:5] keyword_dict = json.loads(es_result['keywords']) sort_keyword = sorted(keyword_dict.items(), key=lambda x:x[1], reverse=True) keyword = sort_keyword[:50] result = [hashtag, keyword, emoticon] if module=='influence': importance_dis = json.loads(es_result['importance_his']) activeness_his = json.loads(es_result['activeness_his']) influence_his = json.loads(es_result['influence_his']) user_influence_list = json.loads(es_result['user_influence_list']) user_influence_result = [] for user_item in user_influence_list: uid = user_item[0] result_item = user_item[:5] for i in range(5,9): item = user_item[i] mid = item[1] number = item[0] if mid != 0 and uid: weibolink = weiboinfo2url(uid, mid) else: weibolink = None result_item.append((number, mid, weibolink)) user_influence_result.append(result_item) ''' origin_max_retweeted_number =es_result['origin_max_retweeted_number'] origin_max_retweeted_id = es_result['origin_max_retweeted_id'] origin_max_retweeted_user = es_result['origin_max_retweeted_user'] if origin_max_retweeted_id != 0 and origin_max_retweeted_user != 0: origin_max_retweeted_weibolink = weiboinfo2url(origin_max_retweeted_user, origin_max_retweeted_id) else: origin_max_retweeted_weibolink = None origin_max_comment_number = es_result['origin_max_comment_number'] origin_max_comment_id = es_result['origin_max_comment_id'] origin_max_comment_user = es_result['origin_max_comment_user'] if origin_max_comment_id !=0 and origin_max_comment_user != 0: origin_max_comment_weibolink = weiboinfo2url(origin_max_comment_user, origin_max_comment_id) else: origin_max_comment_weibolink = None retweet_max_retweeted_number = es_result['retweet_max_retweeted_number'] retweet_max_retweeted_id = es_result['retweet_max_retweeted_id'] retweet_max_retweeted_user = es_result['retweet_max_retweeted_user'] if retweet_max_retweeted_id != 0 and retweet_max_retweeted_user != 0: retweet_max_retweeted_weibolink = weiboinfo2url(retweet_max_retweeted_user, retweet_max_retweeted_id) else: retweet_max_retweeted_weibolink = None retweet_max_comment_number = es_result['retweet_max_comment_number'] retweet_max_comment_id = es_result['retweet_max_comment_id'] retweet_max_comment_user = es_result['retweet_max_comment_user'] if retweet_max_comment_id != 0 and retweet_max_comment_user != 0: retweet_max_comment_weibolink = weiboinfo2url(retweet_max_comment_user, retweet_max_comment_id) else: retweet_max_comment_weibolink = None ''' result = [importance_dis, activeness_his, influence_his, user_influence_result] #print result return result
def search_attribute_portrait(uid): return_results = {} index_name = "sensitive_user_portrait" index_type = "user" try: search_result = es.get(index=index_name, doc_type=index_type, id=uid) except: return None results = search_result['_source'] #return_results = results user_sensitive = user_type(uid) if user_sensitive: #return_results.update(sensitive_attribute(uid)) return_results['user_type'] = 1 return_results['sensitive'] = 1 else: return_results['user_type'] = 0 return_results['sensitive'] = 0 if results['photo_url'] == 0: results['photo_url'] = 'unknown' if results['location'] == 0: results['location'] = 'unknown' return_results['photo_url'] = results['photo_url'] return_results['uid'] = results['uid'] return_results['uname'] = results['uname'] if return_results['uname'] == 0: return_results['uname'] = 'unknown' return_results['location'] = results['location'] return_results['fansnum'] = results['fansnum'] return_results['friendsnum'] = results['friendsnum'] return_results['gender'] = results['gender'] return_results['psycho_status'] = json.loads(results['psycho_status']) keyword_list = [] if results['keywords']: keywords_dict = json.loads(results['keywords']) sort_word_list = sorted(keywords_dict.items(), key=lambda x: x[1], reverse=True) return_results['keywords'] = sort_word_list else: return_results['keywords'] = [] return_results['retweet'] = search_retweet(uid, 0) return_results['follow'] = search_follower(uid, 0) return_results['at'] = search_mention(uid, 0) if results['ip'] and results['geo_activity']: ip_dict = json.loads(results['ip']) geo_dict = json.loads(results['geo_activity']) geo_description = active_geo_description(ip_dict, geo_dict) return_results['geo_description'] = geo_description else: return_results['geo_description'] = '' geo_top = [] temp_geo = {} if results['geo_activity']: geo_dict = json.loads(results['geo_activity']) if len(geo_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(7): ts = ts + 24 * 3600 date = ts2datetime(ts).replace('-', '') if geo_dict.has_key(date): pass else: geo_dict[date] = {} activity_geo_list = sorted(geo_dict.items(), key=lambda x: x[0], reverse=False) geo_list = geo_dict.values() for k, v in activity_geo_list: sort_v = sorted(v.items(), key=lambda x: x[1], reverse=True) top_geo = [item[0] for item in sort_v] geo_top.append([k, top_geo[0:2]]) for iter_key in v.keys(): if temp_geo.has_key(iter_key): temp_geo[iter_key] += v[iter_key] else: temp_geo[iter_key] = v[iter_key] sort_geo_dict = sorted(temp_geo.items(), key=lambda x: x[1], reverse=True) return_results['top_activity_geo'] = sort_geo_dict return_results['activity_geo_distribute'] = geo_top else: return_results['top_activity_geo'] = [] return_results['activity_geo_distribute'] = geo_top hashtag_dict = get_user_hashtag(uid)[0] return_results['hashtag'] = hashtag_dict ''' emotion_result = {} emotion_conclusion_dict = {} if results['emotion_words']: emotion_words_dict = json.loads(results['emotion_words']) for word_type in emotion_mark_dict: try: word_dict = emotion_words_dict[word_type] if word_type=='126' or word_type=='127': emotion_conclusion_dict[word_type] = word_dict sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True) word_list = sort_word_dict[:5] except: results['emotion_words'] = emotion_result emotion_result[emotion_mark_dict[word_type]] = word_list return_results['emotion_words'] = emotion_result ''' # topic if results['topic']: topic_dict = json.loads(results['topic']) sort_topic_dict = sorted(topic_dict.items(), key=lambda x: x[1], reverse=True) return_results['topic'] = sort_topic_dict[:5] else: return_results['topic'] = [] # domain if results['domain']: domain_string = results['domain'] domain_list = domain_string.split('_') return_results['domain'] = domain_list else: return_results['domain'] = [] ''' # emoticon if results['emotion']: emotion_dict = json.loads(results['emotion']) sort_emotion_dict = sorted(emotion_dict.items(), key=lambda x:x[1], reverse=True) return_results['emotion'] = sort_emotion_dict[:5] else: return_results['emotion'] = [] ''' # on_line pattern if results['online_pattern']: online_pattern_dict = json.loads(results['online_pattern']) sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x: x[1], reverse=True) return_results['online_pattern'] = sort_online_pattern_dict[:5] else: return_results['online_pattern'] = [] ''' #psycho_feature if results['psycho_feature']: psycho_feature_list = results['psycho_feature'].split('_') return_results['psycho_feature'] = psycho_feature_list else: return_results['psycho_feature'] = [] ''' # self_state try: profile_result = es_user_profile.get(index='weibo_user', doc_type='user', id=uid) self_state = profile_result['_source'].get('description', '') return_results['description'] = self_state except: return_results['description'] = '' if results['importance']: query_body = { 'query': { 'range': { 'importance': { 'from': results['importance'], 'to': 100000 } } } } importance_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if importance_rank['_shards']['successful'] != 0: return_results['importance_rank'] = importance_rank['count'] else: return_results['importance_rank'] = 0 else: return_results['importance_rank'] = 0 return_results['importance'] = results['importance'] if results['activeness']: query_body = { 'query': { 'range': { 'activeness': { 'from': results['activeness'], 'to': 10000 } } } } activeness_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) print activeness_rank if activeness_rank['_shards']['successful'] != 0: return_results['activeness_rank'] = activeness_rank['count'] else: return_results['activeness_rank'] = 0 else: return_results['activeness_rank'] = 0 return_results['activeness'] = results['activeness'] if results['influence']: query_body = { 'query': { 'range': { 'influence': { 'from': results['influence'], 'to': 100000 } } } } influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if influence_rank['_shards']['successful'] != 0: return_results['influence_rank'] = influence_rank['count'] else: return_results['influence_rank'] = 0 else: return_results['influence_rank'] = 0 return_results['influence'] = results['influence'] if results['sensitive']: query_body = { 'query': { 'range': { 'sensitive': { 'from': results['sensitive'], 'to': 100000 } } } } influence_rank = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if influence_rank['_shards']['successful'] != 0: return_results['sensitive_rank'] = influence_rank['count'] else: return_results['sensitive_rank'] = 0 else: return_results['sensitive_rank'] = 0 return_results['sensitive'] = results['sensitive'] query_body = {'query': {"match_all": {}}} all_count = es.count(index='sensitive_user_portrait', doc_type='user', body=query_body) if all_count['_shards']['successful'] != 0: return_results['all_count'] = all_count['count'] else: print 'es_sensitive_user_portrait error' return_results['all_count'] = 0 # link link_ratio = results['link'] return_results['link'] = link_ratio weibo_trend = get_user_trend(uid)[0] return_results['time_description'] = active_time_description(weibo_trend) return_results['time_trend'] = weibo_trend # user influence trend influence_detail = [] influence_value = [] attention_value = [] ts = time.time() ts = datetime2ts('2013-09-08') - 8 * 24 * 3600 for i in range(1, 8): date = ts2datetime(ts + i * 24 * 3600).replace('-', '') detail = [0] * 10 try: item = es.get(index=date, doc_type='bci', id=uid)['_source'] ''' if return_results['utype']: detail[0] = item.get('s_origin_weibo_number', 0) detail[1] = item.get('s_retweeted_weibo_number', 0) detail[2] = item.get('s_origin_weibo_retweeted_total_number', 0) + item.get('s_retweeted_weibo_retweeted_total_number', 0) detail[3] = item.get('s_origin_weibo_comment_total_number', 0) + item.get('s_retweeted_weibo_comment_total_number', 0) else: ''' if 1: detail[0] = item.get('origin_weibo_number', 0) detail[1] = item.get('retweeted_weibo_number', 0) detail[2] = item.get( 'origin_weibo_retweeted_total_number', 0) + item.get( 'retweeted_weibo_retweeted_total_number', 0) detail[3] = item.get( 'origin_weibo_comment_total_number', 0) + item.get( 'retweeted_weibo_comment_total_number', 0) retweeted_id = item.get('origin_weibo_top_retweeted_id', '0') detail[4] = retweeted_id if retweeted_id: try: detail[5] = es.get(index='sensitive_user_text', doc_type='user', id=retweeted_id)['_source']['text'] except: detail[5] = '' else: detail[5] = '' detail[6] = item.get('origin_weibo_retweeted_top_number', 0) detail[7] = item.get('origin_weibo_top_comment_id', '0') if detail[7]: try: detail[8] = es.get(index='sensitive_user_text', doc_type='user', id=detail[7])['_source']['text'] except: detail[8] = '' else: detail[8] = '' detail[9] = item.get('origin_weibo_comment_top_number', 0) attention_number = detail[2] + detail[3] attention = 2 / (1 + math.exp(-0.005 * attention_number)) - 1 influence_value.append([date, item['user_index']]) influence_detail.append([date, detail]) attention_value.append(attention) except: influence_value.append([date, 0]) influence_detail.append([date, detail]) attention_value.append(0) return_results['influence_trend'] = influence_value return_results['common_influence_detail'] = influence_detail return_results['attention_degree'] = attention_value return return_results
def search_full_text(uid, date): result = [] ts = datetime2ts(date) next_ts = ts + 24 * 3600 query_body = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "uid": uid } }, { "range": { "timestamp": { "gte": ts, "lt": next_ts } } }] } } } }, "size": 200 } search_results = es.search(index='sensitive_user_text', doc_type="user", body=query_body)['hits']['hits'] for item in search_results: detail = [] source = item['_source'] detail.append(source['sensitive']) detail.append(source['message_type']) ts = source['timestamp'] re_time = time.strftime('%H:%M:%S', time.localtime(float(ts))) detail.append(re_time) geo_string = source['geo'] geo_list = geo_string.split('/t') if len(geo_list) >= 3: geo = '/t'.join(geo_list[-2:]) else: geo = geo_string detail.append(geo) detail.append(source['text']) date = date.replace('-', '') mid = source['mid'] try: weibo_bci = es.get(index=date, doc_type='bci', id=uid)['_source'] except: weibo_bci = {} retweeted_number = 0 comment_number = 0 if source['sensitive']: if int(source['message_type']) == 1: if weibo_bci: if weibo_bci.get('s_origin_weibo_retweeted_detail', {}): retweeted_detail = json.loads( weibo_bci['s_origin_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('s_origin_weibo_comment_detail', {}): comment_detail = json.loads( weibo_bci['s_origin_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source['message_type']) == 2: if weibo_bci: if weibo_bci.get('s_retweeted_weibo_retweeted_detail', {}): retweeted_detail = json.loads( weibo_bci['s_retweeted_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('s_retweetd_weibo_comment_detail', {}): comment_detail = json.loads( weibo_bci['s_retweeted_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass else: if int(source['message_type']) == 1: if weibo_bci: print weibo_bci['origin_weibo_retweeted_detail'] if weibo_bci.get('origin_weibo_retweeted_detail', {}): retweeted_detail = json.loads( weibo_bci['origin_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('origin_weibo_comment_detail', {}): comment_detail = json.loads( weibo_bci['origin_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source['message_type']) == 2: if weibo_bci: if weibo_bci.get('retweeted_weibo_retweeted_detail', {}): retweeted_detail = json.loads( weibo_bci['retweeted_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('retweetd_weibo_comment_detail', {}): comment_detail = json.loads( weibo_bci['retweeted_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass detail.append(retweeted_number) detail.append(comment_number) result.append(detail) return result
def sort_sensitive_text(uid): sensitive_text = search_sensitive_text(uid) text_all = [] if sensitive_text: for item in sensitive_text: text_detail = [] item = item['_source'] if not item['sensitive']: continue text = item['text'].encode('utf-8', 'ignore') sentiment_dict = json.loads(item['sentiment']) if not sentiment_dict: sentiment = 0 else: positive = len(sentiment_dict.get('126', {})) negetive = len(sentiment_dict.get('127', {})) + len( sentiment_dict.get('128', {})) + len( sentiment_dict.get('129', {})) if positive > negetive: sentiment = 1 elif positive < negetive: sentiment = -1 else: sentiment = 0 ts = item['timestamp'] uid = item['uid'] mid = item['mid'] message_type = item.get('message_type', 0) date = ts2datetime(float(ts)).replace('-', '') try: bci_result = es.get(index=date, doc_type='bci', id=uid)['_source'] if int(message_type) == 1: retweeted_number = bci_result[ 's_origin_weibo_retweeted_detail'].get(mid) comment_number = bci_result[ 's_origin_weibo_comment_detail'].get(mid) elif int(message_type) == 2: retweeted_number = bci_result[ 's_retweeted_weibo_retweeted_detail'].get(mid) comment_number = bci_result[ 's_retweeted_weibo_comment_detail'].get(mid) else: retweeted_number = 0 comment_number = 0 except: retweeted_number = 0 comment_number = 0 single_sw = item.get('sensitive_words', {}) if single_sw: sw = json.loads(single_sw).keys() else: # print item sw = [] geo = item['geo'] retweeted_link = extract_uname(text) text_detail.extend([ ts, geo, text, sw, retweeted_link, sentiment, message_type, retweeted_number, comment_number ]) text_all.append(text_detail) return text_all
def sensitive_attribute(uid, date): results = {} portrait = {} utype = user_type(uid) if not utype: results['utype'] = 0 return results results['utype'] = 1 results['uid'] = uid portrait_result = es.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source'] results['uname'] = portrait_result['uname'] if portrait_result['uname'] == 0: results['uname'] = 'unknown' if portrait_result['photo_url'] == 0: portrait_result['photo_url'] = 'unknown' if portrait_result['location'] == 0: portrait_result['location'] = 'unknown' results['photo_url'] = portrait_result['photo_url'] # sensitive weibo number statistics date = ts2datetime(time.time()-24*3600).replace('-', '') date = '20130907' # test influence_results = [] try: influence_results = es.get(index=date, doc_type='bci', id=uid)['_source'] results['sensitive_origin_weibo_number'] = influence_results.get('s_origin_weibo_number', 0) results['sensitive_retweeted_weibo_number'] = influence_results.get('s_retweeted_weibo_number', 0) results['sensitive_comment_weibo_number'] = int(influence_results.get('s_comment_weibo_number', 0)) results['sensitive_retweeted_weibo_retweeted_total_number'] = influence_results.get('s_retweeted_weibo_retweeted_total_number', 0) results['sensitive_origin_weibo_retweeted_total_number'] = influence_results.get('s_origin_weibo_retweeted_total_number', 0) results['sensitive_origin_weibo_comment_total_number'] = influence_results.get('s_origin_weibo_comment_total_number', 0) results['sensitive_retweeted_weibo_comment_total_number'] = influence_results.get('s_retweeted_weibo_comment_total_number', 0) except: results['sensitive_origin_weibo_number'] = 0 results['sensitive_retweeted_weibo_number'] = 0 results['sensitive_comment_weibo_number'] = 0 results['sensitive_origin_weibo_retweeted_total_number'] = 0 results['sensitive_origin_weibo_comment_total_number'] = 0 results['sensitive_retweeted_weibo_retweeted_total_number'] = 0 results['sensitive_retweeted_weibo_comment_total_number'] = 0 try: item = es.get(index=date, doc_type='bci', id=uid)['_source'] except: item = {} results['origin_weibo_total_number'] = item.get('origin_weibo_number', 0) + results['sensitive_origin_weibo_number'] results['retweeted_weibo_total_number'] = item.get('retweeted_weibo_number', 0) + results['sensitive_retweeted_weibo_number'] results['comment_weibo_total_number'] = int(item.get('comment_weibo_number', 0)) + int(results['sensitive_comment_weibo_number']) results['origin_weibo_retweeted_total_number'] = item.get('origin_weibo_retweeted_total_number', 0) + results['sensitive_origin_weibo_retweeted_total_number'] results['origin_weibo_comment_total_number'] = item.get('origin_weibo_comment_total_number', 0) + results['sensitive_origin_weibo_comment_total_number'] results['retweeted_weibo_retweeted_total_number'] = item.get('retweeted_weibo_retweeted_total_number', 0)+ results['sensitive_retweeted_weibo_retweeted_total_number'] results['retweeted_weibo_comment_total_number'] = item.get('retweeted_weibo_comment_total_number', 0) + results['sensitive_retweeted_weibo_comment_total_number'] results['sensitive_text'] = sort_sensitive_text(uid) results['sensitive_geo_distribute'] = [] results['sensitive_time_distribute'] = get_user_trend(uid)[1] results['sensitive_hashtag'] = [] results['sensitive_words'] = [] results['sensitive_hashtag_dict'] = [] results['sensitive_words_dict'] = [] results['sensitive_hashtag_description'] = '' sentiment_trend = user_sentiment_trend(uid) emotion_number = sentiment_trend[0] results['negetive_index'] = float(emotion_number[2])/(emotion_number[2]+emotion_number[1]+emotion_number[0]) results['negetive_influence'] = float(emotion_number[1])/(emotion_number[2]+emotion_number[1]+emotion_number[0]) sentiment_dict = sentiment_trend[1] datetime = ts2datetime(time.time()).replace('-', '') return_sentiment = dict() return_sentiment['positive'] = [] return_sentiment['neutral'] = [] return_sentiment['negetive'] = [] ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(1,8): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') temp = sentiment_dict.get(date, {}) return_sentiment['positive'].append([temp.get('positive', 0), date]) return_sentiment['negetive'].append([temp.get('negetive', 0), date]) return_sentiment['neutral'].append([temp.get('neutral', 0), date]) results['sentiment_trend'] = return_sentiment if 1: portrait_results = es.get(index="sensitive_user_portrait", doc_type='user', id=uid)['_source'] results['politics_trend'] = portrait_results['politics_trend'] results['domain'] = portrait_results['domain'] results['sensitive'] = portrait_results['sensitive'] temp_hashtag = portrait_results['sensitive_hashtag_dict'] temp_sensitive_words = portrait_results['sensitive_words_dict'] temp_sensitive_geo = portrait_results['sensitive_geo_activity'] if temp_sensitive_geo: sensitive_geo_dict = json.loads(temp_sensitive_geo) if len(sensitive_geo_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(7): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') if sensitive_geo_dict.has_key(date): pass else: sensitive_geo_dict[date] = {} sorted_sensitive_geo = sorted(sensitive_geo_dict.items(), key=lambda x:x[0], reverse=False) sensitive_geo_list = [] for k,v in sorted_sensitive_geo: temp_list = [] sorted_geo = sorted(v.items(), key=lambda x:x[1], reverse=True)[0:2] # print sorted_geo temp_list.extend([k, sorted_geo]) sensitive_geo_list.append(temp_list) results['sensitive_geo_distribute'] = sensitive_geo_list if temp_hashtag: hashtag_dict = json.loads(portrait_results['sensitive_hashtag_dict']) if len(hashtag_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(7): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') if hashtag_dict.has_key(date): hashtag_dict_detail = hashtag_dict[date] hashtag_dict[date] = sorted(hashtag_dict_detail.items(), key=lambda x:x[1], reverse=True) else: hashtag_dict[date] = {} results['sensitive_hashtag_description'] = hashtag_description(hashtag_dict) else: hashtag_dict = {} if temp_sensitive_words: sensitive_words_dict = json.loads(temp_sensitive_words) if len(sensitive_words_dict) < 7: ts = time.time() ts = datetime2ts('2013-09-08') - 8*24*3600 for i in range(7): ts = ts + 24*3600 date = ts2datetime(ts).replace('-', '') if sensitive_words_dict.has_key(date): pass else: sensitive_words_dict[date] = {} else: sensitive_words_dict = {} date = ts2datetime(time.time()-24*3600).replace('-', '') date = '20130907' today_sensitive_words = sensitive_words_dict.get(date,{}) results['today_sensitive_words'] = today_sensitive_words all_hashtag_dict = {} for item in hashtag_dict: detail_hashtag_dict = hashtag_dict[item] for key in detail_hashtag_dict: if all_hashtag_dict.has_key(key[0]): all_hashtag_dict[key[0]] += key[1] else: all_hashtag_dict[key[0]] = key[1] all_sensitive_words_dict = {} for item in sensitive_words_dict: detail_words_dict = sensitive_words_dict[item] for key in detail_words_dict: if all_sensitive_words_dict.has_key(key): all_sensitive_words_dict[key] += detail_words_dict[key] else: all_sensitive_words_dict[key] = detail_words_dict[key] sorted_hashtag = sorted(all_hashtag_dict.items(), key = lambda x:x[1], reverse=True) sorted_words = sorted(all_sensitive_words_dict.items(), key = lambda x:x[1], reverse=True) sorted_hashtag_dict = sorted(hashtag_dict.items(), key = lambda x:x[0], reverse=False) sorted_words_dict = sorted(sensitive_words_dict.items(), key = lambda x:x[0], reverse=False) new_sorted_dict = sort_sensitive_words(sorted_words) results['sensitive_hashtag'] = sorted_hashtag results['sensitive_words'] = new_sorted_dict results['sensitive_hashtag_dict'] = sorted_hashtag_dict results['sensitive_words_dict'] = sorted_words_dict results['sensitive_retweet'] = search_retweet(uid, 1) results['sensitive_follow'] = search_follower(uid, 1) results['sensitive_at'] = search_mention(uid, 1) return results
def search_full_text(uid, date): index_flow_text = flow_text_index_name_pre + date doctype_flow_text = flow_text_index_type result = [] ts = datetime2ts(date) next_ts = ts + 24*3600 query_body = { "query": { "filtered":{ "filter":{ "bool": { "must": [ {"term": {"uid": uid}} ] } } } }, "size": 200, "sort":{"timestamp":{"order": "desc"}} } search_results = es.search(index=index_flow_text, doc_type=doctype_flow_text, body=query_body)['hits']['hits'] for item in search_results: detail = [] source = item['_source'] detail.append(source.get('sensitive', 0)) detail.append(source['message_type']) ts =source['timestamp'] re_time = time.strftime('%H:%M:%S', time.localtime(float(ts))) detail.append(re_time) geo_string = source['geo'] geo_list = geo_string.split('/t') if len(geo_list) >= 3: geo = '/t'.join(geo_list[-2:]) else: geo = geo_string detail.append(geo) detail.append(source['text']) date = date.replace('-', '') mid = source['mid'] try: weibo_bci = es.get(index=date, doc_type='bci', id=uid)['_source'] except: weibo_bci = {} retweeted_number = 0 comment_number = 0 if source.get('sensitive', 0): if int(source['message_type']) == 1: if weibo_bci: if weibo_bci.get('s_origin_weibo_retweeted_detail', {}): retweeted_detail = json.loads(weibo_bci['s_origin_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('s_origin_weibo_comment_detail', {}): comment_detail = json.loads(weibo_bci['s_origin_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source['message_type']) == 2: if weibo_bci: if weibo_bci.get('s_retweeted_weibo_retweeted_detail', {}): retweeted_detail = json.loads(weibo_bci['s_retweeted_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('s_retweetd_weibo_comment_detail', {}): comment_detail = json.loads(weibo_bci['s_retweeted_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass else: if int(source['message_type']) == 1: if weibo_bci: if weibo_bci.get('origin_weibo_retweeted_detail', {}): retweeted_detail = json.loads(weibo_bci['origin_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('origin_weibo_comment_detail', {}): comment_detail = json.loads(weibo_bci['origin_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) elif int(source['message_type']) == 2: if weibo_bci: if weibo_bci.get('retweeted_weibo_retweeted_detail', {}): retweeted_detail = json.loads(weibo_bci['retweeted_weibo_retweeted_detail']) else: retweeted_detail = {} retweeted_number = retweeted_detail.get(mid, 0) if weibo_bci.get('retweetd_weibo_comment_detail', {}): comment_detail = json.loads(weibo_bci['retweeted_weibo_comment_detail']) else: comment_detail = {} comment_number = comment_detail.get(mid, 0) else: pass detail.append(retweeted_number) detail.append(comment_number) result.append(detail) return result
def ajax_sort_sensitive_words(): level_order = request.args.get('level', '') # 0:all, 1:level 1, 2:level2, 3:level3 category_order = request.args.get('category', '') # '': all uid = request.args.get('uid', '') words_dict = es.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source']['sensitive_words_dict'] words_dict = json.loads(words_dict) all_words_dict = dict() for v in words_dict.values(): for key in v: if all_words_dict.has_key(key): all_words_dict[key] += v[key] else: all_words_dict[key] = v[key] sorted_words = sorted(all_words_dict.items(), key=lambda x: x[1], reverse=True) new_words_list = sort_sensitive_words(sorted_words) print new_words_list if 1: level_1 = [] level_2 = [] level_3 = [] for item in new_words_list: if int(item[2]) == 1: if not category_order: level_1.append(item) else: if item[3] == category_order: level_1.append(item) else: pass elif int(item[2]) == 2: if not category_order: level_2.append(item) else: if item[3] == category_order: level_2.append(item) else: pass elif int(item[2]) == 3: if not category_order: level_3.append(item) else: if item[3] == category_order: level_3.append(item) else: pass new_list = [] if int(level_order) == 0: if not category_order: return json.dumps(new_words_list) else: new_list.extend(level_1) new_list.extend(level_2) new_list.extend(level_3) elif int(level_order) == 1: new_list = level_1 elif int(level_order) == 2: new_list = level_2 else: new_list = level_3 return json.dumps(new_list)