def save_detect_event_task(input_dict): status = True #step1:identify the task name is valid----is not in group es task_information = input_dict['task_information'] task_name = task_information['task_name'] submit_user = task_information['submit_user'] task_id = submit_user + '-' + task_name try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step2:save to es es_status = save_detect2es(input_dict) #step3:save to redis redis_status = save_detect2redis(input_dict) #identify the operation status if es_status == True and redis_status == True: status = True else: status = False return status
def get_group_member_name(task_name, submit_user): results = [] task_id = submit_user + '-' + task_name #print es_group_result,group_index_name,group_index_type try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return results uid_list = group_result['uid_list'] print len(uid_list) try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\ body={'ids':uid_list})['docs'] except: return results print len(user_portrait_result) for item in user_portrait_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['uname'] else: uname = 'unknown' #results[uid] = uname dic = {} dic['ID'] = uid dic['name'] = uname results.append(dic) return results
def search_group_sentiment_weibo(task_name, start_ts, sentiment): weibo_list = [] #step1:get task_name uid try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_name, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step3: get ui2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found']==True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step4:iter date to search weibo weibo_list = [] iter_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) #step4: get query_body if sentiment != '2': query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \ {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}] else: query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\ {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}] try: flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\ body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits'] except: flow_text_result = [] for flow_text_item in flow_text_result: source = flow_text_item['_source'] weibo = {} weibo['uid'] = source['uid'] weibo['uname'] = uid2uname[weibo['uid']] weibo['ip'] = source['ip'] try: weibo['geo'] = '\t'.join(source['geo'].split('&')) except: weibo['geo'] = '' weibo['text'] = source['text'] weibo['timestamp'] = source['timestamp'] weibo['sentiment'] = source['sentiment'] weibo_list.append(weibo) return weibo_list
def get_group_list(task_name, submit_user): results = [] task_id = submit_user + '-' + task_name try: es_results = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: return results uid_list = es_results['uid_list'] user_portrait_attribute = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs'] evaluate_max = get_evaluate_max() for item in user_portrait_attribute: uid = item['_id'] try: source = item['_source'] uname = source['uname'] gender = source['gender'] location = source['location'] importance = source['importance'] normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = source['influence'] normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100 results.append([uid, uname, gender, location, normal_importance, normal_influence]) except: results.append([uid, '', '', '', '', '']) return results
def detect2analysis(input_data): results = {} status = True task_name = input_data['task_name'] uid_list = input_data['uid_list'] #step1: identify the task is exist try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_name)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exsit' #step2: update task uid list task_exist_result['uid_list'] = uid_list #step3: update task_type in es task_exist_result['status'] = 0 # mark the compute status task_exist_result['count'] = len(uid_list) task_exist_result['task_type'] = 'analysis' #get task information dict task_information_dict = {'task_name':task_name, 'uid_list':uid_list, 'status':0, 'count':len(uid_list),\ 'task_type':'analysis', 'submit_user':task_exist_result['submit_user'], 'submit_date':task_exist_result['submit_date'], \ 'detect_type':task_exist_result['detect_type'], 'detect_process':task_exist_result['detect_process'], \ 'state': task_exist_result['state']} add_es_dict = {'task_information':task_information_dict, 'query_condition':task_exist_result['query_condition']} es_status = save_compute2es(add_es_dict) #step4: add task to analysis queue redis_status = save_compute2redis(task_exist_result) #identify the operation status if es_status==True and redis_status==True: status = True else: status = False return status
def save_detect_attribute_task(input_dict, task_max_count): status = True #step1: identify the detect task name id valid---is not in group es task_information = input_dict['task_information'] task_name = task_information['task_name'] submit_user = task_information['submit_user'] task_id = task_information['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #print 'input_dict:', input_dict #identify task max count task_exist_count = identify_exist_detect_task_count(submit_user) if task_exist_count >= task_max_count: return 'more than limit' #step2: save to es es_status = save_detect2es(input_dict) #print 'es_status:', es_status #step3: save to redis redis_status = save_detect2redis(input_dict) #print 'redis_status:', redis_status #identify the operation status if es_status==True and redis_status==True: status = True else: status = False return status
def submit_sensing(input_dict): status = True #step1: identify the task name is valid task_name = input_dict['task_information']['task_name'] submit_user = input_dict['task_information']['submit_user'] task_id = submit_user + '-' + task_name try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step2: save to compute es submit_date = int(time.time()) input_dict['task_information']['submit_date'] = submit_date input_dict['task_information']['count'] = len(input_dict['task_information']['uid_list']) input_dict['task_information']['state'] = input_dict['task_information']['state'] input_dict['task_information']['status'] = 0 input_dict['task_information']['detect_type'] = 'sensing' input_dict['task_information']['task_type'] = input_dict['task_information']['task_type'] input_dict['task_information']['task_id'] = task_id es_status = save_compute2es(input_dict) #step3: save to compute redis add_dict2redis = input_dict['task_information'] redis_status = save_compute2redis(add_dict2redis) #identify the operation status if es_status == True and redis_status ==True: status = True else: status = False return status
def submit_task(input_data): status = 0 # mark it can not submit task_name = input_data['task_name'] try: result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_name)['_source'] except: status = 1 if status != 0 and 'uid_file' not in input_data: input_data['status'] = 0 # mark the task not compute count = len(input_data['uid_list']) input_data['count'] = count input_data['task_type'] = 'analysis' input_data['submit_user'] = '******' input_data['detect_type'] = '' input_data['detect_process'] = '' add_es_dict = {'task_information': input_data, 'query_condition': ''} es_group_result.index(index=group_index_name, doc_type=group_index_type, id=task_name, body=input_data) r.lpush(group_analysis_queue_name, json.dumps(input_data)) return status
def submit_sensing(input_dict): status = True #step1: identify the task name is valid task_name = input_dict['task_information']['task_name'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_name) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step2: save to compute es submit_date = int(time.time()) input_dict['task_information']['submit_date'] = submit_date input_dict['task_information']['count'] = len( input_dict['task_information']['uid_list']) input_dict['task_information']['state'] = input_dict['task_information'][ 'state'] input_dict['task_information']['status'] = 0 input_dict['task_information']['detect_type'] = 'sensing' input_dict['task_information']['task_type'] = input_dict[ 'task_information']['task_type'] es_status = save_compute2es(input_dict) #step3: save to compute redis add_dict2redis = input_dict['task_information'] redis_status = save_compute2redis(add_dict2redis) #identify the operation status if es_status == True and redis_status == True: status = True else: status = False return status
def save_detect_attribute_task(input_dict): status = True #step1: identify the detect task name id valid---is not in group es task_information = input_dict['task_information'] task_name = task_information['task_name'] submit_user = task_information['submit_user'] task_id = task_information['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #print 'input_dict:', input_dict #step2: save to es es_status = save_detect2es(input_dict) print 'es_status:', es_status #step3: save to redis redis_status = save_detect2redis(input_dict) print 'redis_status:', redis_status #identify the operation status if es_status == True and redis_status == True: status = True else: status = False return status
def search_group_sentiment_weibo(task_name, start_ts, sentiment): weibo_list = [] #step1:get task_name uid try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_name, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step3: get ui2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step4:iter date to search weibo weibo_list = [] iter_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + str(iter_date) #step4: get query_body if sentiment != '2': query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \ {'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}] else: query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\ {'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}] try: flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\ body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits'] except: flow_text_result = [] for flow_text_item in flow_text_result: source = flow_text_item['_source'] weibo = {} weibo['uid'] = source['uid'] weibo['uname'] = uid2uname[weibo['uid']] weibo['ip'] = source['ip'] try: weibo['geo'] = '\t'.join(source['geo'].split('&')) except: weibo['geo'] = '' weibo['text'] = source['text'] weibo['timestamp'] = source['timestamp'] weibo['sentiment'] = source['sentiment'] weibo_list.append(weibo) return weibo_list
def save_detect_event_task(input_dict): status = True #step1:identify the task name is valid----is not in group es task_information = input_dict['task_information'] task_name = task_information['task_name'] submit_user = task_information['submit_user'] task_id = task_information['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step2:save to es es_status = save_detect2es(input_dict) #step3:save to redis redis_status = save_detect2redis(input_dict) #identify the operation status if es_status==True and redis_status==True: status = True else: status = False return status
def get_group_list(task_name, submit_user): results = [] task_id = submit_user + '-' + task_name try: es_results = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: return results uid_list = es_results['uid_list'] user_portrait_attribute = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, body={'ids':uid_list})['docs'] evaluate_max = get_evaluate_max() for item in user_portrait_attribute: uid = item['_id'] try: source = item['_source'] uname = source['uname'] gender = source['gender'] location = source['location'] importance = source['importance'] normal_importance = math.log(importance / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = source['influence'] normal_influence = math.log(influence / evaluate_max['influence'] * 9 + 1, 10) * 100 activeness = source['activeness'] normal_activeness = math.log(activeness / evaluate_max['activeness']* 9 + 1, 10) * 100 sensitive = source['sensitive'] normal_sensitive = math.log(sensitive/ evaluate_max['sensitive'] * 9 + 1, 10) * 100 results.append([uid, uname, gender, location, normal_importance, normal_influence, normal_activeness, normal_sensitive]) except: results.append([uid, '', '', '', '', '', '', '']) return results
def save_detect_multi_task(input_dict, extend_mark, task_max_count): results = {} task_information_dict = input_dict['task_information'] input_uid_list = task_information_dict['uid_list'] #step1: identify user is in user_portrait and not in user_portrait in_user_list, out_user_list = identify_user_out(input_uid_list) if len(in_user_list) == 0: return 'invalid seed user', out_user_list input_dict['task_information']['uid_list'] = in_user_list #print 'step1' #step2: identify task name is valid task_name = input_dict['task_information']['task_name'] submit_user = input_dict['task_information']['submit_user'] task_id = input_dict['task_information']['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #print 'step2' #step3: identify whether or not to extend----extend mark if extend_mark == '1': #print 'step3 save' #identify the task exist count is not more than task_max_count task_exist_count = identify_exist_detect_task_count(submit_user) if task_exist_count >= task_max_count: return 'more than limit' es_status = save_detect2es(input_dict) redis_status = save_detect2redis(input_dict) # detect redis queue elif extend_mark == '0': uid_list = input_dict['task_information']['uid_list'] input_dict['task_information']['uid_list'] = uid_list input_dict['task_information']['status'] = 0 input_dict['task_information']['count'] = len(uid_list) #identify the task exist count is not more than task_max_count task_exist_count = identify_exist_analysis_task_count(submit_user) if task_exist_count >= task_max_count: return 'more than limit' es_status = save_compute2es(input_dict) add_redis_dict = input_dict['task_information'] redis_status = save_compute2redis( add_redis_dict) # compute redis queue #identify the operation status if es_status == True and redis_status == True: status = True else: status = False return status, out_user_list
def submit_task(input_data): print 'aaaaaaaaaaaaaaa' print input_data status = 0 # mark it can not submit task_name = input_data['task_name'] submit_user = input_data['submit_user'] task_id = submit_user + '-' + task_name #identify the compute task is not more than limit try: task_max_count = input_data['task_max_count'] except: task_max_count = 0 query_body = { 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term': {'submit_user': submit_user}}, {'term': {'status': 0}} ] } } } } } print es_group_result,group_index_name,group_index_type exist_compute_result = es_group_result.search(index=group_index_name, doc_type=group_index_type, body=query_body)['hits']['hits'] exist_compute_count = len(exist_compute_result) if exist_compute_count >= task_max_count: return 'more than limit' #identify the task name is valid try: result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: status = 1 if status != 0 and 'uid_file' not in input_data: input_data['status'] = 0 # mark the task not compute count = len(input_data['uid_list']) input_data['count'] = count input_data['task_type'] = 'analysis' input_data['submit_user'] = submit_user input_data['detect_type'] = '' input_data['detect_process'] = '' input_data['task_id'] = task_id add_es_dict = {'task_information': input_data, 'query_condition':''} # print es_group_result,group_index_name,group_index_type # print r # print '**********************8' es_group_result.index(index=group_index_name, doc_type=group_index_type, id=task_id, body=input_data) #print es_group_result,group_index_name,group_index_type r.lpush(group_analysis_queue_name, json.dumps(input_data)) #print status print status return status
def get_activity_weibo(task_name, start_ts): results = [] #step1: get task_name uid try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\ id=task_name, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step2: get uid2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body = {'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found']==True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step3: search time_segment weibo time_segment = FOUR_HOUR end_ts = start_ts + time_segment time_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + time_date query = [] query.append({'terms':{'uid': uid_list}}) query.append({'range':{'timestamp':{'gte':start_ts, 'lt':end_ts}}}) try: flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \ body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits'] except: flow_text_es_result = [] for item in flow_text_es_result: weibo = {} source = item['_source'] weibo['timestamp'] = ts2date(source['timestamp']) weibo['ip'] = source['ip'] weibo['text'] = source['text'] if source['geo']: weibo['geo'] = '\t'.join(source['geo']) else: weibo['geo'] = '' results.append(weibo) return results
def edit_state(task_name, submit_user, new_state): results = True task_id = submit_user + '-' + task_name try: group_exist = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return 'group no exist' es_group_result.update(index=group_index_name, doc_type=group_index_type,\ id=task_id, body={'doc':{'state': new_state}}) return results
def get_activity_weibo(task_name, start_ts): results = [] #step1: get task_name uid try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\ id=task_name, _source=False, fields=['uid_list']) except: group_result = {} if group_result == {}: return 'task name invalid' try: uid_list = group_result['fields']['uid_list'] except: uid_list = [] if uid_list == []: return 'task uid list null' #step2: get uid2uname uid2uname = {} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body = {'ids':uid_list}, _source=False, fields=['uname'])['docs'] except: user_portrait_result = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: uname = item['fields']['uname'][0] uid2uname[uid] = uname #step3: search time_segment weibo time_segment = FOUR_HOUR end_ts = start_ts + time_segment time_date = ts2datetime(start_ts) flow_text_index_name = flow_text_index_name_pre + time_date query = [] query.append({'terms': {'uid': uid_list}}) query.append({'range': {'timestamp': {'gte': start_ts, 'lt': end_ts}}}) try: flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \ body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits'] except: flow_text_es_result = [] for item in flow_text_es_result: weibo = {} source = item['_source'] weibo['timestamp'] = ts2date(source['timestamp']) weibo['ip'] = source['ip'] weibo['text'] = source['text'] if source['geo']: weibo['geo'] = '\t'.join(source['geo']) else: weibo['geo'] = '' results.append(weibo) return results
def save_detect_multi_task(input_dict, extend_mark, task_max_count): results = {} task_information_dict = input_dict['task_information'] input_uid_list = task_information_dict['uid_list'] #step1: identify user is in user_portrait and not in user_portrait in_user_list, out_user_list = identify_user_out(input_uid_list) if len(in_user_list)==0: return 'invalid seed user', out_user_list input_dict['task_information']['uid_list'] = in_user_list #print 'step1' #step2: identify task name is valid task_name = input_dict['task_information']['task_name'] submit_user = input_dict['task_information']['submit_user'] task_id = input_dict['task_information']['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #print 'step2' #step3: identify whether or not to extend----extend mark if extend_mark=='1': #print 'step3 save' #identify the task exist count is not more than task_max_count task_exist_count = identify_exist_detect_task_count(submit_user) if task_exist_count >= task_max_count: return 'more than limit' es_status = save_detect2es(input_dict) redis_status = save_detect2redis(input_dict) # detect redis queue elif extend_mark=='0': uid_list = input_dict['task_information']['uid_list'] input_dict['task_information']['uid_list'] = uid_list input_dict['task_information']['status'] = 0 input_dict['task_information']['count'] = len(uid_list) #identify the task exist count is not more than task_max_count task_exist_count = identify_exist_analysis_task_count(submit_user) if task_exist_count >= task_max_count: return 'more than limit' es_status = save_compute2es(input_dict) add_redis_dict = input_dict['task_information'] redis_status = save_compute2redis(add_redis_dict) # compute redis queue #identify the operation status if es_status==True and redis_status==True: status = True else: status = False return status, out_user_list
def delete_group_results(task_name, submit_user): task_id = submit_user + '-' + task_name #step1: get group uid list try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return False uid_list = group_result['uid_list'] #step2: update group_tag in user_portrait query_body = {'query': {'term': {'group': task_id}}} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids': uid_list})['docs'] except: user_portrait_result = [] bulk_action = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: try: source = item['_source'] except: source = {} try: group_tag = source['group'] except: group_tag = '' if group_tag != '': new_group_tag_list = [] group_tag_list = group_tag.split('&') for group_tag_item in group_tag_list: if group_tag_item != task_id and group_tag_item != '[email protected]': new_group_tag_list.append(group_tag_item) new_group_tag = '&'.join(new_group_tag_list) else: new_group_tag = '' action = {'update': {'_id': uid}} bulk_action.extend([action, {'doc': {'group': new_group_tag}}]) if bulk_action: print 'bulk_action:', bulk_action es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type) #step3: delete group results in group_manage try: print 'yes delete' result = es.delete(index=index_name, doc_type=index_type, id=task_id) except: return False return True
def show_detect_result(task_name, submit_user): user_result = [] #step1:identify the task name id exist task_id = submit_user + '-' + task_name try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exist' #step2:get uid list uid_list = json.loads(task_exist_result['uid_list']) #step3:get user evaluation information---uid/uname/activeness/importance/influence iter_count = 0 uid_count = len(uid_list) while iter_count < uid_count: iter_user_list = uid_list[iter_count:iter_count + DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':iter_user_list}, _source=True)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['uname'] evaluate_max = get_evaluate_max() activeness = math.log( source['activeness'] / evaluate_max['activeness'] * 9 + 1, 10) * 100 importance = math.log( source['importance'] / evaluate_max['importance'] * 9 + 1, 10) * 100 influence = math.log( source['influence'] / evaluate_max['influence'] * 9 + 1, 10) * 100 else: uname = u'未知' activeness = u'未知' importance = u'未知' influence = u'未知' user_result.append([uid, uname, activeness, importance, influence]) iter_count += DETECT_ITER_COUNT sort_user_result = sorted(user_result, key=lambda x: x[4], reverse=True) return sort_user_result
def delete_group_results(task_name, submit_user): task_id = submit_user + '-' + task_name #step1: get group uid list try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return False uid_list = group_result['uid_list'] #step2: update group_tag in user_portrait query_body = {'query':{'term':{'group': task_id}}} try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids': uid_list})['docs'] except: user_portrait_result = [] bulk_action = [] for item in user_portrait_result: uid = item['_id'] if item['found'] == True: try: source = item['_source'] except: source = {} try: group_tag = source['group'] except: group_tag = '' if group_tag != '': new_group_tag_list = [] group_tag_list = group_tag.split('&') for group_tag_item in group_tag_list: if group_tag_item != task_id and group_tag_item != '[email protected]': new_group_tag_list.append(group_tag_item) new_group_tag = '&'.join(new_group_tag_list) else: new_group_tag = '' action = {'update':{'_id': uid}} bulk_action.extend([action, {'doc': {'group': new_group_tag}}]) if bulk_action: print 'bulk_action:', bulk_action es_user_portrait.bulk(bulk_action, index=portrait_index_name, doc_type=portrait_index_type) #step3: delete group results in group_manage try: print 'yes delete' result = es.delete(index=index_name, doc_type=index_type, id=task_id) except: return False return True
def show_vary_detail(task_name, submit_user, vary_pattern): results = [] task_id = submit_user + '-' + task_name #identify the task_id exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return 'group task is not exist' #identify the task status=1 status = source['status'] if status != 1: return 'group task is not completed' #get vary detail geo try: vary_detail_geo = json.loads(source['vary_detail_geo']) except: vary_detail_geo = {} if vary_detail_geo == {}: return 'vary detail geo none' #get vary_detail vary_pattern_list = vary_pattern.split('-') vary_pattern_key = '&'.join(vary_pattern_list) uid_ts_list = vary_detail_geo[vary_pattern_dict] uid_list = [item[0] for item in uid_ts_list] #get user name try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list})['docs'] except: user_portrait_result = [] uname_dict = {} for portrait_item in user_portrait_result: uid = portrait_item['_id'] if portrait_item['found']==True: uname = portrait_item['_source']['uname'] uname_dict[uid] = uname else: uname_dict[uid] = uid #get vary detail new_detail = [] for vary_item in uid_ts_list: uname = uname_dict[vary_item[0]] start_date = ts2datetime(vary_item[1]) end_date = ts2datetime(vary_item[2]) new_detail.append([vary_item[0], uname, start_date, end_date]) return new_detail
def show_vary_detail(task_name, submit_user, vary_pattern): results = [] task_id = submit_user + '-' + task_name #identify the task_id exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: return 'group task is not exist' #identify the task status=1 status = source['status'] if status != 1: return 'group task is not completed' #get vary detail geo try: vary_detail_geo = json.loads(source['vary_detail_geo']) except: vary_detail_geo = {} if vary_detail_geo == {}: return 'vary detail geo none' #get vary_detail vary_pattern_list = vary_pattern.split('-') vary_pattern_key = '&'.join(vary_pattern_list) uid_ts_list = vary_detail_geo[vary_pattern_dict] uid_list = [item[0] for item in uid_ts_list] #get user name try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\ body={'ids':uid_list})['docs'] except: user_portrait_result = [] uname_dict = {} for portrait_item in user_portrait_result: uid = portrait_item['_id'] if portrait_item['found'] == True: uname = portrait_item['_source']['uname'] uname_dict[uid] = uname else: uname_dict[uid] = uid #get vary detail new_detail = [] for vary_item in uid_ts_list: uname = uname_dict[vary_item[0]] start_date = ts2datetime(vary_item[1]) end_date = ts2datetime(vary_item[2]) new_detail.append([vary_item[0], uname, start_date, end_date]) return new_detail
def submit_task(input_data): status = 0 # mark it can not submit task_name = input_data['task_name'] submit_user = input_data['submit_user'] task_id = submit_user + '-' + task_name #identify the compute task is not more than limit try: task_max_count = input_data['task_max_count'] except: task_max_count = 0 query_body = { 'query':{ 'filtered':{ 'filter':{ 'bool':{ 'must':[ {'term': {'submit_user': submit_user}}, {'term': {'status': 0}} ] } } } } } exist_compute_result = es_group_result.search(index=group_index_name, doc_type=group_index_type, body=query_body)['hits']['hits'] exist_compute_count = len(exist_compute_result) if exist_compute_count >= task_max_count: return 'more than limit' #identify the task name is valid try: result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: status = 1 if status != 0 and 'uid_file' not in input_data: input_data['status'] = 0 # mark the task not compute count = len(input_data['uid_list']) input_data['count'] = count input_data['task_type'] = 'analysis' input_data['submit_user'] = submit_user input_data['detect_type'] = '' input_data['detect_process'] = '' add_es_dict = {'task_information': input_data, 'query_condition':''} es_group_result.index(index=group_index_name, doc_type=group_index_type, id=task_id, body=input_data) r.lpush(group_analysis_queue_name, json.dumps(input_data)) return status
def get_group_tag(group_name): result = {} order_result = [] #get group task uid list #get user tag #statistic tag try: group_task_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=group_name) except: return 'no group task' try: uid_list = group_task_result['_source']['uid_list'] except: return 'no user' try: user_result = es.mget(index=user_index_name, doc_type=user_index_type, body={'ids': uid_list})['docs'] except Exception, e: raise e
def show_detect_result(task_name, submit_user): user_result = [] #step1:identify the task name id exist task_id = submit_user + '-' + task_name try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id)['_source'] except: task_exist_result = {} if task_exist_result == {}: return 'task name is not exist' #step2:get uid list uid_list = json.loads(task_exist_result['uid_list']) #step3:get user evaluation information---uid/uname/activeness/importance/influence iter_count = 0 uid_count = len(uid_list) while iter_count < uid_count: iter_user_list = uid_list[iter_count: iter_count+DETECT_ITER_COUNT] try: portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':iter_user_list}, _source=True)['docs'] except: portrait_result = [] for item in portrait_result: uid = item['_id'] if item['found']==True: source = item['_source'] uname = source['uname'] evaluate_max = get_evaluate_max() activeness = math.log(source['activeness']/evaluate_max['activeness'] * 9 + 1 ,10)*100 importance = math.log(source['importance']/evaluate_max['importance'] * 9 + 1 ,10)*100 influence = math.log(source['influence']/evaluate_max['influence'] * 9 + 1 ,10)*100 else: uname = u'未知' activeness = u'未知' importance = u'未知' influence = u'未知' user_result.append([uid, uname, activeness, importance, influence]) iter_count += DETECT_ITER_COUNT sort_user_result = sorted(user_result, key=lambda x:x[4], reverse=True) return sort_user_result
def submit_task(input_data): status = 0 # mark it can not submit task_name = input_data['task_name'] try: result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_name)['_source'] except: status = 1 if status != 0 and 'uid_file' not in input_data: input_data['status'] = 0 # mark the task not compute count = len(input_data['uid_list']) input_data['count'] = count input_data['task_type'] = 'analysis' input_data['submit_user'] = '******' input_data['detect_type'] = '' input_data['detect_process'] = '' add_es_dict = {'task_information': input_data, 'query_condition':''} es_group_result.index(index=group_index_name, doc_type=group_index_type, id=task_name, body=input_data) r.lpush(group_analysis_queue_name, json.dumps(input_data)) return status
def save_detect_multi_task(input_dict, extend_mark): results = {} task_information_dict = input_dict['task_information'] input_uid_list = task_information_dict['uid_list'] #step1: identify user is in user_portrait and not in user_portrait in_user_list, out_user_list = identify_user_out(input_uid_list) input_dict['task_information']['uid_list'] = in_user_list print 'step1' #step2: identify task name is valid task_name = input_dict['task_information']['task_name'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_name)['_source'] except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' print 'step2' #step3: identify whether or not to extend----extend mark if extend_mark=='1': print 'step3 save' es_status = save_detect2es(input_dict) redis_status = save_detect2redis(input_dict) # detect redis queue elif extend_mark=='0': uid_list = input_dict['task_information']['uid_list'] input_dict['task_information']['uid_list'] = uid_list input_dict['task_information']['status'] = 0 print 'uid_list:', len(uid_list), uid_list, type(uid_list) input_dict['task_information']['count'] = len(uid_list) print 'step3 save' es_status = save_compute2es(input_dict) add_redis_dict = input_dict['task_information'] redis_status = save_compute2redis(add_redis_dict) # compute redis queue #identify the operation status if es_status==True and redis_status==True: status = True else: status = False return status, out_user_list
def get_group_member_name(task_name): results = {} try: group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_name)['_source'] except: return results uid_list = group_result['uid_list'] try: user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\ body={'ids':uid_list})['docs'] except: return results for item in user_portrait_result: uid = item['_id'] if item['found'] == True: source = item['_source'] uname = source['uname'] else: uname = 'unkown' results[uid] = uname return results
def search_group_results(task_name, module, submit_user): result = {} task_id = submit_user + '-' + task_name #step1:identify the task_name exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type, \ id=task_id)['_source'] except: return 'group task is not exist' #step2: identify the task status=1(analysis completed) status = source['status'] if status != 1: return 'group task is not completed' #step3:get module result if module == 'overview': result['task_name'] = source['task_name'] result['submit_date'] = ts2datetime(source['submit_date']) result['state'] = source['state'] result['submit_user'] = source['submit_user'] result['density_star'] = source['density_star'] result['activeness_star'] = source['activeness_star'] result['influence_star'] = source['influence_star'] result['importance_star'] = source['importance_star'] #need to delete result['tag_vector'] = json.loads(source['tag_vector']) elif module == 'basic': result['gender'] = json.loads(source['gender']) result['verified'] = json.loads(source['verified']) result['user_tag'] = json.loads(source['user_tag']) result['count'] = source['count'] result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'activity': result['activity_trend'] = json.loads(source['activity_trend']) result['activity_time'] = json.loads(source['activity_time']) result['activity_geo_disribution'] = json.loads( source['activity_geo_distribution']) result['activiy_geo_vary'] = json.loads(source['activity_geo_vary']) result['activeness_trend'] = json.loads(source['activeness']) result['activeness_his'] = json.loads(source['activeness_his']) result['activeness_description'] = source['activeness_description'] result['online_pattern'] = json.loads(source['online_pattern']) try: vary_detail_geo_dict = json.loads(source['vary_detail_geo']) except: vary_detail_geo_dict = {} uid_list = source['uid_list'] if vary_detail_geo_dict != {}: result['vary_detail_geo'] = get_vary_detail_info( vary_detail_geo_dict, uid_list) else: result['vary_detail_geo'] = {} try: main_start_geo_dict = json.loads(source['main_start_geo']) except: main_start_geo_dict = {} result['main_start_geo'] = sorted(main_start_geo_dict.items(), key=lambda x: x[1], reverse=True) try: main_end_geo_dict = json.loads(source['main_end_geo']) except: main_end_geo_dict = {} result['main_end_geo'] = sorted(main_end_geo_dict.items(), key=lambda x: x[1], reverse=True) #all_geo_list = list(set(main_start_geo_dict.keys()) | set(main_end_geo_dict.keys())) #result['geo_lat_lng'] = get_lat_lng(all_geo_list) elif module == 'preference': #result['keywords'] = json.loads(source['keywords']) keyword_list = json.loads(source['keywords']) keyword_dict = dict() for item in keyword_list: keyword_dict[item[0]] = item[1] filter_keyword_dict = keyword_filter(keyword_dict) sort_keyword = sorted(filter_keyword_dict.items(), key=lambda x: x[1], reverse=True) result['keywords'] = sort_keyword result['hashtag'] = json.loads(source['hashtag']) result['sentiment_word'] = json.loads(source['sentiment_word']) try: result['topic_model'] = json.loads(source['topic_model']) except: result['topic_model'] = [] #need to delete result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'influence': result['influence_his'] = json.loads(source['influence_his']) result['influence_trend'] = json.loads(source['influence']) result['influence_in_user'] = json.loads(source['influence_in_user']) result['influence_out_user'] = json.loads(source['influence_out_user']) elif module == 'social': result['in_density'] = source['in_density'] result['in_inter_user_ratio'] = source['in_inter_user_ratio'] result['in_inter_weibo_ratio'] = source['in_inter_weibo_ratio'] result['social_in_record'] = json.loads(source['social_in_record']) result['out_inter_user_ratio'] = source['out_inter_user_ratio'] result['out_inter_weibo_ratio'] = source['out_inter_weibo_ratio'] result['social_out_record'] = json.loads(source['social_out_record']) result['density_description'] = source['density_description'] result['mention'] = source['mention'] elif module == 'think': result['sentiment_trend'] = json.loads(source['sentiment_trend']) result['sentiment_pie'] = json.loads(source['sentiment_pie']) result['character'] = json.loads(source['character']) return result
def search_group_results(task_name, module, submit_user): result = {} if RUN_TYPE == 0: #jln #task_id = '媒体' #group_index_type='text' task_id = submit_user + '-' + task_name group_index_type = 'group' else: task_id = submit_user + '-' + task_name #print es_group_result,group_index_name,group_index_type,task_id #step1:identify the task_name exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type, \ id=task_id)['_source'] print source except: return 'group task is not exist' #step2: identify the task status=1(analysis completed) status = source['status'] if status != 1: return 'group task is not completed' #step3:get module result if module == 'overview': result['task_name'] = source['task_name'] result['submit_date'] = ts2datetime(source['submit_date']) result['state'] = source['state'] result['submit_user'] = source['submit_user'] result['density_star'] = source['density_star'] result['activeness_star'] = source['activeness_star'] result['influence_star'] = source['influence_star'] result['importance_star'] = source['importance_star'] #need to delete result['tag_vector'] = json.loads(source['tag_vector']) elif module == 'basic': result['gender'] = json.loads(source['gender']) result['verified'] = json.loads(source['verified']) result['user_tag'] = json.loads(source['user_tag']) result['count'] = source['count'] result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'activity': result['activity_trend'] = json.loads(source['activity_trend']) result['activity_time'] = json.loads(source['activity_time']) result['activity_geo_disribution'] = json.loads( source['activity_geo_distribution']) result['activiy_geo_vary'] = json.loads(source['activity_geo_vary']) result['activeness_trend'] = json.loads(source['activeness']) result['activeness_his'] = json.loads(source['activeness_his']) result['activeness_description'] = source['activeness_description'] result['online_pattern'] = json.loads(source['online_pattern']) #yuanhuiru uid_list = source['uid_list'] user_photo_result = es_user_portrait.mget(index='user_portrait_1222', doc_type='user', body={'ids': uid_list}, fields=['photo_url'])['docs'] influ_value_result = es_user_portrait.mget(index='user_portrait_1222', doc_type='user', body={'ids': uid_list}, fields=['influence' ])['docs'] result['photo_url'] = [] result['influence'] = [] for item in user_photo_result: #uid = item['_id'] if item['found'] == True: source = item['fields'] photo_url = source['photo_url'] else: photo_url = 'unknown' result['photo_url'].append(photo_url) #print 'user_photo', result['photo_url'] for item in influ_value_result: #uid = item['_id'] if item['found'] == True: source = item['fields'] influence = source['influence'] else: influence = 'unknown' result['influence'].append(influence) #print 'influence', result['influence'] new_geo = {} for uid, geos in result['activity_geo_disribution'].iteritems(): for geo, count in geos.iteritems(): geo = geo.split('\t') if geo[0] == u'中国': if len(geo) == 1: geo.append(u'未知', u'未知') elif len(geo) == 2: geo.append(u'未知') try: new_geo[geo[1]]['total'] += count except: new_geo[geo[1]] = {'total': count} try: new_geo[geo[1]][geo[2]] += count except: new_geo[geo[1]][geo[2]] = count result['new_geo'] = new_geo try: vary_detail_geo_dict = json.loads(source['vary_detail_geo']) except: vary_detail_geo_dict = {} #uid_list = source['uid_list'] if vary_detail_geo_dict != {}: result['vary_detail_geo'] = get_vary_detail_info( vary_detail_geo_dict, uid_list) else: result['vary_detail_geo'] = {} try: main_start_geo_dict = json.loads(source['main_start_geo']) except: main_start_geo_dict = {} result['main_start_geo'] = sorted(main_start_geo_dict.items(), key=lambda x: x[1], reverse=True) try: main_end_geo_dict = json.loads(source['main_end_geo']) except: main_end_geo_dict = {} result['main_end_geo'] = sorted(main_end_geo_dict.items(), key=lambda x: x[1], reverse=True) #all_geo_list = list(set(main_start_geo_dict.keys()) | set(main_end_geo_dict.keys())) #result['geo_lat_lng'] = get_lat_lng(all_geo_list) print 'result!!!!!!', result elif module == 'preference': try: result['keywords'] = json.loads(source['filter_keyword']) except: f_keyword = json.loads(source['keywords']) key_str = ','.join([key[0] for key in f_keyword]) filter_dict = get_weibo_single(key_str, n_count=100) result['keywords'] = sorted(filter_dict.iteritems(), key=lambda x: x[1], reverse=True) ''' keyword_list = json.loads(source['keywords']) keyword_dict = dict() for item in keyword_list: keyword_dict[item[0]] = item[1] filter_keyword_dict = keyword_filter(keyword_dict) sort_keyword = sorted(filter_keyword_dict.items(), key=lambda x:x[1], reverse=True) result['keywords'] = sort_keyword ''' result['hashtag'] = json.loads(source['hashtag']) result['sentiment_word'] = json.loads(source['sentiment_word']) try: result['topic_model'] = json.loads(source['topic_model']) except: result['topic_model'] = [] #need to delete result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'influence': result['influence_his'] = json.loads(source['influence_his']) result['influence_trend'] = json.loads(source['influence']) result['influence_in_user'] = json.loads(source['influence_in_user']) result['influence_out_user'] = json.loads(source['influence_out_user']) elif module == 'social': result['in_density'] = source['in_density'] result['in_inter_user_ratio'] = source['in_inter_user_ratio'] result['in_inter_weibo_ratio'] = source['in_inter_weibo_ratio'] result['social_in_record'] = json.loads(source['social_in_record']) result['out_inter_user_ratio'] = source['out_inter_user_ratio'] result['out_inter_weibo_ratio'] = source['out_inter_weibo_ratio'] result['social_out_record'] = json.loads(source['social_out_record']) result['density_description'] = source['density_description'] result['mention'] = source['mention'] elif module == 'think': result['sentiment_trend'] = json.loads(source['sentiment_trend']) result['sentiment_pie'] = json.loads(source['sentiment_pie']) result['character'] = json.loads(source['character']) return result
def group_user_weibo(task_name, submit_user, sort_type): weibo_list = [] now_date = ts2datetime(time.time()) if sort_type == 'retweet': sort_type = 'retweeted' #run_type if RUN_TYPE == 0: now_date = RUN_TEST_TIME sort_type = 'timestamp' #step1: get group user task_id = submit_user + '-' + task_name try: group_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: group_exist_result = {} if not group_exist_result: return 'group no exist' #step2: get user weibo list uid_list = group_exist_result['uid_list'] for i in range(6, -1, -1): iter_date = ts2datetime(datetime2ts(now_date) - i * DAY) index_name = flow_text_index_name_pre + iter_date try: weibo_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\ body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'sort':[{sort_type: {'order': 'desc'}}], 'size':100})['hits']['hits'] except: weibo_result = [] if weibo_result: weibo_list.extend(weibo_result) #sort_weibo_list = sorted(weibo_list, key=lambda x:x['_source'][sort_type], reverse=True)[:100] sort_weibo_list = weibo_list #step3: get user name try: portrait_exist_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':uid_list})['docs'] except: portrait_exist_result = [] uid2uname_dict = {} for portrait_item in portrait_exist_result: uid = portrait_item['_id'] if portrait_item['found'] == True: source = portrait_item['_source'] uname = source['uname'] else: uname = 'unknown' uid2uname_dict[uid] = uname weibo_list = [] for weibo_item in sort_weibo_list: source = weibo_item['_source'] mid = source['mid'] uid = source['uid'] uname = uid2uname_dict[uid] text = source['text'] ip = source['geo'] timestamp = source['timestamp'] date = ts2date(timestamp) sentiment = source['sentiment'] weibo_url = weiboinfo2url(uid, mid) #run_type: if RUN_TYPE == 1: try: retweet_count = source['retweeted'] except: retweet_count = 0 try: comment_count = source['comment'] except: comment_count = 0 try: sensitive_score = source['sensitive'] except: sensitive_score = 0 else: retweet_count = 0 comment_count = 0 sensitive_score = 0 city = ip2city(ip) weibo_list.append([ mid, uid, uname, text, ip, city, timestamp, date, retweet_count, comment_count, sensitive_score, weibo_url ]) if sort_type == 'timestamp': new_weibo_list = sorted(weibo_list, key=lambda x: x[6], reverse=True) elif sort_type == 'retweeted': new_weibo_list = sorted(weibo_list, key=lambda x: x[8], reverse=True) elif sort_type == 'comment': new_weibo_list = sorted(weibo_list, key=lambda x: x[9], reverse=True) elif sort_type == 'sensitive': new_weibo_list = sorted(weibo_list, key=lambda x: x[10], reverse=True) return new_weibo_list
def search_group_member(task_name, submit_user): task_id = submit_user + '-' + task_name results = es_group_result.get(index=group_index_name,doc_type=group_index_type,\ id=task_id,fields=['uid_list'])['fields']['uid_list'] print results return results
query.append({'bool':{'should': query_list}}) try: seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits'] except Exception, e: raise e try: seed_user_source = seed_user_result[0]['_source'] except: return 'seed user invalid' #step2: identify the detect task name is valid----is not in group es task_information = input_dict['task_information'] task_id = task_information['task_id'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_id) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step3: save to es es_status = save_detect2es(input_dict) #step4: save to redis queue redis_status = save_detect2redis(input_dict) #identify the operation status if es_status==True and redis_status==True: status = True else: status = False
def search_group_results(task_name, module): result = {} #step1:identify the task_name exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type, \ id=task_name)['_source'] except: return 'group task is not exist' #step2: identify the task status=1(analysis completed) status = source['status'] if status != 1: return 'group task is not completed' #step3:get module result if module == 'overview': result['task_name'] = source['task_name'] result['submit_date'] = ts2datetime(source['submit_date']) result['state'] = source['state'] result['submit_user'] = source['submit_user'] result['density_star'] = source['density_star'] result['activeness_star'] = source['activeness_star'] result['influence_star'] = source['influence_star'] result['importance_star'] = source['importance_star'] result['tag_vector'] = json.loads(source['tag_vector']) elif module == 'basic': result['gender'] = json.loads(source['gender']) result['verified'] = json.loads(source['verified']) result['user_tag'] = json.loads(source['user_tag']) result['count'] = source['count'] elif module == 'activity': result['activity_trend'] = json.loads(source['activity_trend']) result['activity_time'] = json.loads(source['activity_time']) #result['activity_geo_disribution'] = json.loads(source['activity_geo_distribution']) new_activity_geo_distribution = deal_geo_distribution(json.loads(source['activity_geo_distribution'])) result['activity_geo_disribution'] = new_activity_geo_distribution result['activiy_geo_vary'] = json.loads(source['activity_geo_vary']) result['activeness_trend'] = json.loads(source['activeness']) result['activeness_his'] = json.loads(source['activeness_his']) result['activeness_description'] = source['activeness_description'] result['online_pattern'] = json.loads(source['online_pattern']) elif module == 'preference': result['keywords'] = json.loads(source['keywords']) result['hashtag'] = json.loads(source['hashtag']) result['sentiment_word'] = json.loads(source['sentiment_word']) result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'influence': result['influence_his'] = json.loads(source['influence_his']) result['influence_trend'] = json.loads(source['influence']) result['influence_in_user'] = json.loads(source['influence_in_user']) result['influence_out_user'] = json.loads(source['influence_out_user']) elif module == 'social': result['in_density'] = source['in_density'] result['in_inter_user_ratio'] = source['in_inter_user_ratio'] result['in_inter_weibo_ratio'] = source['in_inter_weibo_ratio'] result['social_in_record'] = json.loads(source['social_in_record']) result['out_inter_user_ratio'] = source['out_inter_user_ratio'] result['out_inter_weibo_ratio'] = source['out_inter_weibo_ratio'] result['social_out_record'] = json.loads(source['social_out_record']) result['density_description'] = source['density_description'] result['mention'] = source['mention'] elif module == 'think': result['sentiment_trend'] = json.loads(source['sentiment_trend']) result['sentiment_pie'] = json.loads(source['sentiment_pie']) result['character'] = json.loads(source['character']) return result
def search_group_results(task_name, module): result = {} #step1:identify the task_name exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type, \ id=task_name)['_source'] except: return 'group task is not exist' #step2: identify the task status=1(analysis completed) status = source['status'] if status != 1: return 'group task is not completed' #step3:get module result if module == 'overview': result['task_name'] = source['task_name'] result['submit_date'] = ts2datetime(source['submit_date']) result['state'] = source['state'] result['submit_user'] = source['submit_user'] result['density_star'] = source['density_star'] result['activeness_star'] = source['activeness_star'] result['influence_star'] = source['influence_star'] result['importance_star'] = source['importance_star'] result['tag_vector'] = json.loads(source['tag_vector']) elif module == 'basic': result['gender'] = json.loads(source['gender']) result['verified'] = json.loads(source['verified']) result['user_tag'] = json.loads(source['user_tag']) result['count'] = source['count'] elif module == 'activity': result['activity_trend'] = json.loads(source['activity_trend']) result['activity_time'] = json.loads(source['activity_time']) #result['activity_geo_disribution'] = json.loads(source['activity_geo_distribution']) new_activity_geo_distribution = deal_geo_distribution( json.loads(source['activity_geo_distribution'])) result['activity_geo_disribution'] = new_activity_geo_distribution result['activiy_geo_vary'] = json.loads(source['activity_geo_vary']) result['activeness_trend'] = json.loads(source['activeness']) result['activeness_his'] = json.loads(source['activeness_his']) result['activeness_description'] = source['activeness_description'] result['online_pattern'] = json.loads(source['online_pattern']) elif module == 'preference': result['keywords'] = json.loads(source['keywords']) result['hashtag'] = json.loads(source['hashtag']) result['sentiment_word'] = json.loads(source['sentiment_word']) result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'influence': result['influence_his'] = json.loads(source['influence_his']) result['influence_trend'] = json.loads(source['influence']) result['influence_in_user'] = json.loads(source['influence_in_user']) result['influence_out_user'] = json.loads(source['influence_out_user']) elif module == 'social': result['in_density'] = source['in_density'] result['in_inter_user_ratio'] = source['in_inter_user_ratio'] result['in_inter_weibo_ratio'] = source['in_inter_weibo_ratio'] result['social_in_record'] = json.loads(source['social_in_record']) result['out_inter_user_ratio'] = source['out_inter_user_ratio'] result['out_inter_weibo_ratio'] = source['out_inter_weibo_ratio'] result['social_out_record'] = json.loads(source['social_out_record']) result['density_description'] = source['density_description'] result['mention'] = source['mention'] elif module == 'think': result['sentiment_trend'] = json.loads(source['sentiment_trend']) result['sentiment_pie'] = json.loads(source['sentiment_pie']) result['character'] = json.loads(source['character']) return result
def search_group_results(task_name, module, submit_user): result = {} if RUN_TYPE == 0: #jln #task_id = 'mytest030302' task_id = submit_user + '-' + task_name #group_index_type = 'text' else: task_id = submit_user + '-' + task_name #print es_group_result,group_index_name,group_index_type,task_id #step1:identify the task_name exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type, \ id=task_id)['_source'] except: return 'group task is not exist' #step2: identify the task status=1(analysis completed) status = source['status'] if status != 1: return 'group task is not completed' #step3:get module result if module == 'overview': result['task_name'] = source['task_name'] result['submit_date'] = ts2datetime(source['submit_date']) result['state'] = source['state'] result['submit_user'] = source['submit_user'] result['density_star'] = source['density_star'] result['activeness_star'] = source['activeness_star'] result['influence_star'] = source['influence_star'] result['importance_star'] = source['importance_star'] #need to delete result['tag_vector'] = json.loads(source['tag_vector']) elif module == 'basic': result['gender'] = json.loads(source['gender']) result['verified'] = json.loads(source['verified']) result['user_tag'] = json.loads(source['user_tag']) result['count'] = source['count'] result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'activity': result['activity_trend'] = json.loads(source['activity_trend']) result['activity_time'] = json.loads(source['activity_time']) result['activity_geo_disribution'] = json.loads( source['activity_geo_distribution']) result['activiy_geo_vary'] = json.loads(source['activity_geo_vary']) result['activeness_trend'] = json.loads(source['activeness']) result['activeness_his'] = json.loads(source['activeness_his']) result['activeness_description'] = source['activeness_description'] result['online_pattern'] = json.loads(source['online_pattern']) new_geo = {} for uid, geos in result['activity_geo_disribution'].iteritems(): for geo, count in geos.iteritems(): geo = geo.split('\t') if geo[0] == u'中国': if len(geo) == 1: geo.append(u'未知', u'未知') elif len(geo) == 2: geo.append(u'未知') try: new_geo[geo[1]]['total'] += count except: new_geo[geo[1]] = {'total': count} try: new_geo[geo[1]][geo[2]] += count except: new_geo[geo[1]][geo[2]] = count # try: # new_geo[geo] += count # except: # new_geo[geo] = count result['new_geo'] = new_geo try: vary_detail_geo_dict = json.loads(source['vary_detail_geo']) except: vary_detail_geo_dict = {} uid_list = source['uid_list'] if vary_detail_geo_dict != {}: result['vary_detail_geo'] = get_vary_detail_info( vary_detail_geo_dict, uid_list) else: result['vary_detail_geo'] = {} try: main_start_geo_dict = json.loads(source['main_start_geo']) except: main_start_geo_dict = {} result['main_start_geo'] = sorted(main_start_geo_dict.items(), key=lambda x: x[1], reverse=True) try: main_end_geo_dict = json.loads(source['main_end_geo']) except: main_end_geo_dict = {} result['main_end_geo'] = sorted(main_end_geo_dict.items(), key=lambda x: x[1], reverse=True) #all_geo_list = list(set(main_start_geo_dict.keys()) | set(main_end_geo_dict.keys())) #result['geo_lat_lng'] = get_lat_lng(all_geo_list) elif module == 'preference': result['keywords'] = json.loads(source['keywords']) ''' keyword_list = json.loads(source['keywords']) keyword_dict = dict() for item in keyword_list: keyword_dict[item[0]] = item[1] filter_keyword_dict = keyword_filter(keyword_dict) sort_keyword = sorted(filter_keyword_dict.items(), key=lambda x:x[1], reverse=True) result['keywords'] = sort_keyword ''' result['hashtag'] = json.loads(source['hashtag']) result['sentiment_word'] = json.loads(source['sentiment_word']) try: result['topic_model'] = json.loads(source['topic_model']) except: result['topic_model'] = [] #need to delete result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'influence': result['influence_his'] = json.loads(source['influence_his']) result['influence_trend'] = json.loads(source['influence']) result['influence_in_user'] = json.loads(source['influence_in_user']) result['influence_out_user'] = json.loads(source['influence_out_user']) elif module == 'social': result['in_density'] = source['in_density'] result['in_inter_user_ratio'] = source['in_inter_user_ratio'] result['in_inter_weibo_ratio'] = source['in_inter_weibo_ratio'] result['social_in_record'] = json.loads(source['social_in_record']) result['out_inter_user_ratio'] = source['out_inter_user_ratio'] result['out_inter_weibo_ratio'] = source['out_inter_weibo_ratio'] result['social_out_record'] = json.loads(source['social_out_record']) result['density_description'] = source['density_description'] result['mention'] = source['mention'] elif module == 'think': result['sentiment_trend'] = json.loads(source['sentiment_trend']) result['sentiment_pie'] = json.loads(source['sentiment_pie']) result['character'] = json.loads(source['character']) return result
def group_user_weibo(task_name, submit_user, sort_type): weibo_list = [] now_date = ts2datetime(time.time()) #run_type if RUN_TYPE == 0: now_date = RUN_TEST_TIME sort_type = 'timestamp' #step1: get group user task_id = submit_user + '-' + task_name try: group_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\ id=task_id)['_source'] except: group_exist_result = {} if not group_exist_result: return 'group no exist' #step2: get user weibo list uid_list = group_exist_result['uid_list'] for i in range(7,0,-1): iter_date = ts2datetime(datetime2ts(now_date) - i * DAY) index_name = flow_text_index_name_pre + iter_date try: weibo_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\ body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'sort':sort_type, 'size':100})['hits']['hits'] except: weibo_result = [] if weibo_result: weibo_list.extend(weibo_result) sort_weibo_list = sorted(weibo_list, key=lambda x:x['_source'][sort_type], reverse=True)[:100] #step3: get user name try: portrait_exist_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \ body={'ids':uid_list})['docs'] except: portrait_exist_result = [] uid2uname_dict = {} for portrait_item in portrait_exist_result: uid = portrait_item['_id'] if portrait_item['found'] == True: source = portrait_item['_source'] uname = source['uname'] else: uname = 'unknown' uid2uname_dict[uid] = uname weibo_list = [] for weibo_item in sort_weibo_list: source = weibo_item['_source'] mid = source['mid'] uid = source['uid'] uname = uid2uname_dict[uid] text = source['text'] ip = source['geo'] timestamp = source['timestamp'] date = ts2date(timestamp) sentiment = source['sentiment'] weibo_url = weiboinfo2url(uid, mid) #run_type: if RUN_TYPE == 1: retweet_count = source['retweet_count'] comment_count = source['comment_count'] sensitive_score = source['sensitive_score'] else: retweet_count = 0 comment_count = 0 sensitive_score = 0 city = ip2city(ip) weibo_list.append([mid, uid, uname, text, ip, city, timestamp, date, retweet_count, comment_count, sensitive_score, weibo_url]) return weibo_list
try: seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \ body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits'] except Exception, e: raise e try: seed_user_source = seed_user_result[0]['_source'] except: return 'seed user invalid' #step2: identify the detect task name is valid----is not in group es task_information = input_dict['task_information'] task_name = task_information['task_name'] try: task_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type, id=task_name) except: task_exist_result = {} if task_exist_result != {}: return 'task name invalid' #step3: save to es es_status = save_detect2es(input_dict) #step4: save to redis queue redis_status = save_detect2redis(input_dict) #identify the operation status if es_status == True and redis_status == True: status = True else: status = False
def search_group_results(task_name, module, submit_user): result = {} task_id = submit_user + '-' + task_name #step1:identify the task_name exist try: source = es_group_result.get(index=group_index_name, doc_type=group_index_type, \ id=task_id)['_source'] except: return 'group task is not exist' #step2: identify the task status=1(analysis completed) status = source['status'] if status != 1: return 'group task is not completed' #step3:get module result if module == 'overview': result['task_name'] = source['task_name'] result['submit_date'] = ts2datetime(source['submit_date']) result['state'] = source['state'] result['submit_user'] = source['submit_user'] result['density_star'] = source['density_star'] result['activeness_star'] = source['activeness_star'] result['influence_star'] = source['influence_star'] result['importance_star'] = source['importance_star'] #need to delete result['tag_vector'] = json.loads(source['tag_vector']) elif module == 'basic': result['gender'] = json.loads(source['gender']) result['verified'] = json.loads(source['verified']) result['user_tag'] = json.loads(source['user_tag']) result['count'] = source['count'] result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'activity': result['activity_trend'] = json.loads(source['activity_trend']) result['activity_time'] = json.loads(source['activity_time']) result['activity_geo_disribution'] = json.loads(source['activity_geo_distribution']) result['activiy_geo_vary'] = json.loads(source['activity_geo_vary']) result['activeness_trend'] = json.loads(source['activeness']) result['activeness_his'] = json.loads(source['activeness_his']) result['activeness_description'] = source['activeness_description'] result['online_pattern'] = json.loads(source['online_pattern']) try: vary_detail_geo_dict = json.loads(source['vary_detail_geo']) except: vary_detail_geo_dict = {} uid_list = source['uid_list'] if vary_detail_geo_dict != {}: result['vary_detail_geo'] = get_vary_detail_info(vary_detail_geo_dict, uid_list) else: result['vary_detail_geo'] = {} try: main_start_geo_dict = json.loads(source['main_start_geo']) except: main_start_geo_dict = {} result['main_start_geo'] = sorted(main_start_geo_dict.items(), key=lambda x:x[1], reverse=True) try: main_end_geo_dict = json.loads(source['main_end_geo']) except: main_end_geo_dict = {} result['main_end_geo'] = sorted(main_end_geo_dict.items(), key=lambda x:x[1], reverse=True) elif module == 'preference': result['keywords'] = json.loads(source['keywords']) result['hashtag'] = json.loads(source['hashtag']) result['sentiment_word'] = json.loads(source['sentiment_word']) try: result['topic_model'] = json.loads(source['topic_model']) except: result['topic_model'] = [] #need to delete result['domain'] = json.loads(source['domain']) result['topic'] = json.loads(source['topic']) elif module == 'influence': result['influence_his'] = json.loads(source['influence_his']) result['influence_trend'] = json.loads(source['influence']) result['influence_in_user'] = json.loads(source['influence_in_user']) result['influence_out_user'] = json.loads(source['influence_out_user']) elif module == 'social': result['in_density'] = source['in_density'] result['in_inter_user_ratio'] = source['in_inter_user_ratio'] result['in_inter_weibo_ratio'] = source['in_inter_weibo_ratio'] result['social_in_record'] = json.loads(source['social_in_record']) result['out_inter_user_ratio'] = source['out_inter_user_ratio'] result['out_inter_weibo_ratio'] = source['out_inter_weibo_ratio'] result['social_out_record'] = json.loads(source['social_out_record']) result['density_description'] = source['density_description'] result['mention'] = source['mention'] elif module == 'think': result['sentiment_trend'] = json.loads(source['sentiment_trend']) result['sentiment_pie'] = json.loads(source['sentiment_pie']) result['character'] = json.loads(source['character']) return result