示例#1
0
def conclusion_on_influence(uid):
    # test
    index_name = copy_portrait_index_name
    index_type = copy_portrait_index_type
    total_number = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type)["count"]

    try:
        influ_result = es.get(index=index_name, doc_type=index_type, id=uid)["_source"]
    except:
        influ_result = {}
        result = [0, 0, 0, 0, 0, 0, total_number]  # aver_activeness, sorted, aver_influence, sorted
        return result

    aver_activeness = influ_result.get("aver_activeness", 0)
    aver_influence = influ_result.get("aver_influence", 0)
    aver_importance = influ_result.get("aver_importance", 0)
    influence_query_body = {"query": {"match_all": {}}, "sort": {"aver_influence": {"order": "desc"}}, "size": 1}
    top_influence = es.search(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body
    )["hits"]["hits"][0]["sort"][0]

    importance_query_body = {"query": {"match_all": {}}, "sort": {"aver_importance": {"order": "desc"}}, "size": 1}
    top_importance = es.search(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body
    )["hits"]["hits"][0]["sort"][0]

    activeness_query_body = {"query": {"match_all": {}}, "sort": {"aver_activeness": {"order": "desc"}}, "size": 1}
    top_activeness = es.search(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body
    )["hits"]["hits"][0]["sort"][0]

    influence_query_body = {"query": {"filtered": {"filter": {"range": {"aver_influence": {"gt": aver_influence}}}}}}

    activeness_query_body = {"query": {"filtered": {"filter": {"range": {"aver_activeness": {"gt": aver_activeness}}}}}}

    importance_query_body = {"query": {"filtered": {"filter": {"range": {"aver_importance": {"gt": aver_importance}}}}}}

    influence_count = es.count(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body
    )["count"]
    activeness_count = es.count(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body
    )["count"]
    importance_count = es.count(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body
    )["count"]

    result = [
        int(aver_activeness * 100.0 / top_activeness),
        activeness_count,
        int(aver_influence * 100.0 / top_influence),
        influence_count,
        int(aver_importance * 100.0 / top_importance),
        importance_count,
        total_number,
    ]
    return result
def ajax_create_task():
    # task_name forbid illegal enter
    task_number = request.args.get('task_number', 1)
    task_name = request.args.get('task_name','') # must
    create_by = request.args.get('create_by', 'admin') # 用户
    stop_time = request.args.get('stop_time', "default") #timestamp, 1234567890
    social_sensors = request.args.get("social_sensors", "") #uid_list, split with ","
    remark = request.args.get("remark", "")
    _id = create_by + "-" + str(task_name)
    exist_es = es.exists(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id)
    if exist_es:
        return json.dumps(["0"]) # 任务名不能重合
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"processing_status": "1"}},
                            {"term":{"finish":"0"}}
                        ]
                    }
                }
            }
        }
    }
    
    unfinish_number =  es.count(index=index_manage_sensing_task, doc_type=task_doc_type, body=query_body)['count']
    if unfinish_number > (int(task_number)-1):
        return "more than limit"
    if task_name:
        task_detail = dict()
        task_detail["task_name"] = task_name
        task_detail["create_by"] = create_by # 创建任务, user
        task_detail["stop_time"] = stop_time
        task_detail["remark"] = remark
        if social_sensors:
            task_detail["social_sensors"] = json.dumps(list(set(social_sensors.split(','))))
        else:
            return json.dumps(['-1'])
        now_ts = int(time.time())
        task_detail["create_at"] = now_ts # now_ts
        task_detail["warning_status"] = '0'
        task_detail["finish"] = "0" # not end the task
        task_detail["history_status"] = json.dumps([]) # ts, keywords, warning_status
        task_detail['burst_reason'] = ''
        task_detail['processing_status'] = "1" #任务正在进行

    # store task detail into es
    es.index(index=index_manage_sensing_task, doc_type=task_doc_type, id=_id, body=task_detail)


    return json.dumps(["1"])
def new_get_user_evaluate(uid):
    results = {}
    #get all count in user_portrait
    query_body = {
            'query':{
                'match_all': {}
                }
            }
    all_count_results = es_user_portrait.count(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)
    if all_count_results['_shards']['successful'] != 0:
        all_count = all_count_results['count']
    else:
        all_count = ''
    #get influence from es influence history
    try:
        influence_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_INFLUENCE, doc_type=COPY_USER_PORTRAIT_INFLUENCE_TYPE, \
                id = uid)['_source']
    except:
        influence_history = []
    #get max value/min value/week ave value
    if influence_history:
        week_ave = influence_history['bci_week_ave']
        week_ave_rank = get_influence_week_ave_rank(week_ave)
        influence_item = [week_ave, week_ave_rank]
        influence_max_min_now_list =  get_evaluate_max_min_now(influence_history, 'bci')
        influence_max_min_now_list.append(all_count)
        influence_item.extend(influence_max_min_now_list[2:])
        results['influence'] = influence_max_min_now_list
    else:
        results['influence'] = ['', '', '', '', all_count]
    #get importance from es importance history
    try:
        importance_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_IMPORTANCE, doc_type=COPY_USER_PORTRAIT_IMPORTANCE_TYPE, \
                id = uid)['_source']
    except:
        importance_history = []
    #get max value/min value/now value
    if importance_history:
        importance_max_min_now_list = get_evaluate_max_min_now(importance_history, 'importance')
        importance_max_min_now_list.append(all_count)
        results['importance'] = importance_max_min_now_list
    else:
        results['importance'] = ['', '', '', '', all_count]
    #get activeness from es activeness history
    try:
        activeness_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_ACTIVENESS, doc_type=COPY_USER_PORTRAIT_ACTIVENESS_TYPE ,\
                id = uid)['_source']
    except:
        activeness_history = []
    #get max value/min value/ now value
    if activeness_history:
        activeness_max_min_now_list = get_evaluate_max_min_now(activeness_history, 'activeness')
        activeness_max_min_now_list.append(all_count)
        results['activeness'] = activeness_max_min_now_list
    else:
        results['activeness'] = ['', '', '', '', all_count]

    #get sensitive from es sensitive history
    try:
        sensitive_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_SENSITIVE, doc_type=COPY_USER_PORTRAIT_SENSITIVE_TYPE, \
                id = uid)['_source']
    except:
        sensitive_history = []
    #get max value/min value/ now value
    if sensitive_history:
        sensitive_max_min_now_list = get_evaluate_max_min_now(sensitive_history, 'sensitive')
        sensitive_max_min_now_list.append(all_count)
        results['sensitive'] = sensitive_max_min_now_list
    else:
        results['sensitive'] = ['', '', '', '', all_count]
    
    return results
def new_get_user_evaluate(uid):
    results = {}
    #get all count in user_portrait
    query_body = {
            'query':{
                'match_all': {}
                }
            }
    all_count_results = es_user_portrait.count(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)
    if all_count_results['_shards']['successful'] != 0:
        all_count = all_count_results['count']
    else:
        all_count = ''
    #get influence from es influence history
    try:
        influence_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_INFLUENCE, doc_type=COPY_USER_PORTRAIT_INFLUENCE_TYPE, \
                id = uid)['_source']
    except:
        influence_history = []
    #get max value/min value/week ave value
    if influence_history:
        week_ave = influence_history['bci_week_ave']
        week_ave_rank = get_influence_week_ave_rank(week_ave)
        influence_item = [week_ave, week_ave_rank]
        influence_max_min_now_list =  get_evaluate_max_min_now(influence_history, 'bci')
        influence_max_min_now_list.append(all_count)
        influence_item.extend(influence_max_min_now_list[2:])
        results['influence'] = influence_max_min_now_list
    else:
        results['influence'] = ['', '', '', '', all_count]
    #get importance from es importance history
    try:
        importance_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_IMPORTANCE, doc_type=COPY_USER_PORTRAIT_IMPORTANCE_TYPE, \
                id = uid)['_source']
    except:
        importance_history = []
    #get max value/min value/now value
    if importance_history:
        importance_max_min_now_list = get_evaluate_max_min_now(importance_history, 'importance')
        importance_max_min_now_list.append(all_count)
        results['importance'] = importance_max_min_now_list
    else:
        results['importance'] = ['', '', '', '', all_count]
    #get activeness from es activeness history
    try:
        activeness_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_ACTIVENESS, doc_type=COPY_USER_PORTRAIT_ACTIVENESS_TYPE ,\
                id = uid)['_source']
    except:
        activeness_history = []
    #get max value/min value/ now value
    if activeness_history:
        activeness_max_min_now_list = get_evaluate_max_min_now(activeness_history, 'activeness')
        activeness_max_min_now_list.append(all_count)
        results['activeness'] = activeness_max_min_now_list
    else:
        results['activeness'] = ['', '', '', '', all_count]

    #get sensitive from es sensitive history
    try:
        sensitive_history = ES_COPY_USER_PORTRAIT.get(index=COPY_USER_PORTRAIT_SENSITIVE, doc_type=COPY_USER_PORTRAIT_SENSITIVE_TYPE, \
                id = uid)['_source']
    except:
        sensitive_history = []
    #get max value/min value/ now value
    if sensitive_history:
        sensitive_max_min_now_list = get_evaluate_max_min_now(sensitive_history, 'sensitive')
        sensitive_max_min_now_list.append(all_count)
        results['sensitive'] = sensitive_max_min_now_list
    else:
        results['sensitive'] = ['', '', '', '', all_count]
    
    return results
def imagine(uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type):

    personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source']

    keys_list = query_fields_dict.keys() #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if personal_info[iter_key] == '' or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                        ]
                    }
                }
            }
        }
    }

    """
    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard
    """

    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']
    query_body['size'] = 150 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}})

        query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    return_list = []
    count = 0

    if number > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
            info.append(normal_value)
        info.append(item['_score']/top_score*100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)


    return results
示例#6
0
def imagine(submit_user,
            uid,
            query_fields_dict,
            index_name=portrait_index_name,
            doctype=portrait_index_type):
    default_setting_dict = query_fields_dict

    personal_info = es.get(index=portrait_index_name,
                           doc_type=portrait_index_type,
                           id=uid,
                           _source=True)['_source']

    user_tag = submit_user + "-tag"
    user_tag_string = personal_info.get(user_tag, "")
    if user_tag_string:
        tag_pairs_list = user_tag_string.split('&')
    else:
        tag_pairs_list = []
    tag_dict = dict()
    if tag_pairs_list:
        for item in tag_pairs_list:
            iter_pair = item.split('-')
            tag_dict[iter_pair[0]] = iter_pair[1]

    keys_list = []
    for k, v in query_fields_dict.iteritems():
        if v:
            keys_list.append(k)  #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []
    tag_attri_vaule = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if iter_key in personal_info:
            if not personal_info[iter_key] or not query_fields_dict[iter_key]:
                query_fields_dict.pop(iter_key)
                continue
            else:
                iter_list.append(iter_key)
                temp = personal_info[iter_key]
                search_dict[iter_key] = temp.split('&')

        else:
            query_fields_dict.pop(iter_key)
            if tag_dict.get(iter_key, ''):
                tag_attri_vaule.append(iter_key + "-" + tag_dict[iter_key])

    if len(iter_list) == 0 and len(tag_attri_vaule) == 0:
        return []
    query_body = {
        'query': {
            'function_score': {
                'query': {
                    'bool': {
                        'must': []
                    }
                }
            }
        }
    }
    number = es.count(index=index_name, doc_type=doctype,
                      body=query_body)['count']

    query_body['size'] = 150  # default number
    query_number = query_fields_dict['size']  #  required number
    query_fields_dict.pop('size')

    if tag_attri_vaule:
        query_body['query']['function_score']['query']['bool']['must'].append(
            {"terms": {
                user_tag: tag_attri_vaule
            }})

    for (k, v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        if k in personal_info and v != 0:
            for iter_key in search_dict[k]:
                temp_list.append({
                    'wildcard': {
                        k: {
                            'wildcard': '*' + iter_key + '*',
                            'boost': v
                        }
                    }
                })

            query_body['query']['function_score']['query']['bool'][
                'must'].append({'bool': {
                    'should': temp_list
                }})

    filter_uid = all_delete_uid()
    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    field_list = ['uid', 'uname', 'activeness', 'importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    return_list = []
    count = 0

    if len(result) > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(
                    value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
                if not normal_value:
                    normal_value = item['_id']
            info.append(normal_value)
        info.append(item['_score'] / float(top_score) * 100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(
                value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)
    results.append(default_setting_dict)

    return results
示例#7
0
def conclusion_on_influence(uid):
    # test
    index_name = copy_portrait_index_name
    index_type = copy_portrait_index_type
    total_number = es.count(index=copy_portrait_index_name,
                            doc_type=copy_portrait_index_type)["count"]

    try:
        influ_result = es.get(index=index_name, doc_type=index_type,
                              id=uid)['_source']
    except:
        influ_result = {}
        result = [0, 0, 0, 0, 0, 0, total_number
                  ]  # aver_activeness, sorted, aver_influence, sorted
        return result

    aver_activeness = influ_result.get("aver_activeness", 0)
    aver_influence = influ_result.get("aver_influence", 0)
    aver_importance = influ_result.get('aver_importance', 0)
    influence_query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "aver_influence": {
                "order": "desc"
            }
        },
        "size": 1
    }
    top_influence = es.search(
        index=copy_portrait_index_name,
        doc_type=copy_portrait_index_type,
        body=influence_query_body)['hits']['hits'][0]['sort'][0]

    importance_query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "aver_importance": {
                "order": "desc"
            }
        },
        "size": 1
    }
    top_importance = es.search(
        index=copy_portrait_index_name,
        doc_type=copy_portrait_index_type,
        body=importance_query_body)['hits']['hits'][0]['sort'][0]

    activeness_query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "aver_activeness": {
                "order": "desc"
            }
        },
        "size": 1
    }
    top_activeness = es.search(
        index=copy_portrait_index_name,
        doc_type=copy_portrait_index_type,
        body=activeness_query_body)['hits']['hits'][0]['sort'][0]

    influence_query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "aver_influence": {
                            "gt": aver_influence
                        }
                    }
                }
            }
        }
    }

    activeness_query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "aver_activeness": {
                            "gt": aver_activeness
                        }
                    }
                }
            }
        }
    }

    importance_query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "aver_importance": {
                            "gt": aver_importance
                        }
                    }
                }
            }
        }
    }

    influence_count = es.count(index=copy_portrait_index_name,
                               doc_type=copy_portrait_index_type,
                               body=influence_query_body)['count']
    activeness_count = es.count(index=copy_portrait_index_name,
                                doc_type=copy_portrait_index_type,
                                body=activeness_query_body)['count']
    importance_count = es.count(index=copy_portrait_index_name,
                                doc_type=copy_portrait_index_type,
                                body=importance_query_body)['count']

    result = [
        int(aver_activeness * 100.0 / top_activeness), activeness_count,
        int(aver_influence * 100.0 / top_influence), influence_count,
        int(aver_importance * 100.0 / top_importance), importance_count,
        total_number
    ]
    return result
示例#8
0
def imagine(uid, query_fields_dict,index_name="user_portrait", doctype='user'):

    """
    uid: search users relate to uid
    query_fields_dict: defined search field weight
    fields: domain, topic, keywords, psycho_status, psycho_feature, activity_geo, hashtag
    for example: "domain": 2
    domain, psycho_feature
    """
    personal_info = es.get(index="user_portrait", doc_type="user", id=uid, _source=True)['_source']

    keys_list = query_fields_dict.keys()
    keys_list.remove('field')
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    for iter_key in keys_list:
        if personal_info[iter_key] == '' or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                        ]
                    }
                },
                "field_value_factor":{
                }
            }
        }
    }

    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard

    query_fields_dict.pop('field')
    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']
    query_body['size'] = 100 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}})

        query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence']
    return_list = []
    count = 0
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            info.append(item['_source'][field])
        info.append(item['_score'])
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        temp_list.append(personal_info[field])

    results = []
    results.append(temp_list)
    results.extend(return_list)


    return results
示例#9
0
def user_sort_interface(username,
                        time,
                        sort_scope,
                        sort_norm,
                        arg=None,
                        st=None,
                        et=None,
                        isall=False,
                        task_number=0,
                        number=100):

    task_number = int(task_number)
    # print "user_interface:", number
    user_list = []
    if isall:
        #deal with the situation of all net user
        if sort_scope == 'all_limit_keyword':
            #offline job
            #add job to es index
            during = (datetime2ts(et) - datetime2ts(st)) / DAY + 1
            time = 7
            if during > 3:
                time = 7
            elif during > 16:
                time = 30
            running_number = es_user_portrait.count(
                index='user_rank_keyword_task',
                doc_type='user_rank_task',
                body=query_task_number(username))['count']
            # print 'running',running_number
            if running_number > task_number - 1:
                return "more than limit"
            search_id = add_task(username, "keyword", "all", 'flow_text_',
                                 during, st, et, arg, sort_norm, sort_scope,
                                 time, isall, number)
            #deal with the offline task
            return {"flag": True, "search_id": search_id}
        elif sort_scope == 'all_nolimit':
            #online job
            # print "all_sort, ", number,sort_norm
            user_list = all_sort_filter(None, sort_norm, time, False, number)
    else:
        if sort_scope == 'in_limit_keyword':
            #offline job
            #deal with the offline task
            during = (datetime2ts(et) - datetime2ts(st)) / DAY + 1
            time = 1
            if during > 3:
                time = 7
            elif during > 16:
                time = 30
            running_number = es_user_portrait.count(
                index='user_rank_keyword_task',
                doc_type='user_rank_task',
                body=query_task_number(username))['count']
            if running_number > task_number - 1:
                return "more than limit"
            search_id = add_task(username, "keyword", "in", 'flow_text_',
                                 during, st, et, arg, sort_norm, sort_scope,
                                 time, isall, number)
            return {"flag": True, "search_id": search_id}
        elif sort_scope == 'in_limit_hashtag':
            during = (datetime2ts(et) - datetime2ts(st)) / DAY + 1
            time = 7
            if during > 3:
                time = 7
            elif during > 16:
                time = 30
            running_number = es_user_portrait.count(
                index='user_rank_keyword_task',
                doc_type='user_rank_task',
                body=query_task_number(username))['count']
            if running_number > task_number - 1:
                return "more than limit"
            search_id = add_task(username, "hashtag", "in", 'flow_text_',
                                 during, st, et, arg, sort_norm, sort_scope,
                                 time, isall, number)
            return {"flag": True, "search_id": search_id}
        else:
            #find the scope
            #in_limit_topic
            user_list = in_sort_filter(time, sort_norm, sort_scope, arg, [],
                                       False, number)
            #print user_list
    result = make_up_user_info(user_list, isall, time, sort_norm)
    # print "user_list:", len(user_list)
    return result
示例#10
0
def search_attribute_portrait(uid):
    results = dict()
    index_name = 'user_portrait'
    index_type = 'user'
    try:
        results = es_user_portrait.get(index=index_name, doc_type=index_type, id=uid)['_source']
    except:
        results = None
        return None
    keyword_list = []
    if results['keywords']:
        keywords_dict = json.loads(results['keywords'])
        sort_word_list = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)
        #print 'sort_word_list:', sort_word_list
        results['keywords'] = sort_word_list
    else:
        results['keywords'] = []
    #print 'keywords:', results
    geo_top = []
    if results['activity_geo_dict']:
        geo_dict = json.loads(results['activity_geo_dict'])
        sort_geo_dict = sorted(geo_dict.items(), key=lambda x:x[1], reverse=True)
        geo_top = sort_geo_dict
        results['activity_geo'] = geo_top
    else:
        results['activity_geo'] = []
    if results['hashtag_dict']:
        hashtag_dict = json.loads(results['hashtag_dict'])
        sort_hashtag_dict = sorted(hashtag_dict.items(), key=lambda x:x[1], reverse=True)
        results['hashtag_dict'] = sort_hashtag_dict[:5]
        descriptions = hashtag_description(hashtag_dict)
        results['hashtag_description'] = descriptions
    else:
        results['hashtag_dict'] = []
        results['hashtag_description'] = ''
    emotion_result = {}
    emotion_conclusion_dict = {}
    if results['emotion_words']:
        emotion_words_dict = json.loads(results['emotion_words'])
        for word_type in emotion_mark_dict:
            try:
                word_dict = emotion_words_dict[word_type]
                if word_type=='126' or word_type=='127':
                    emotion_conclusion_dict[word_type] = word_dict
                sort_word_dict = sorted(word_dict.items(), key=lambda x:x[1], reverse=True)
                #print 'sort_word_dict:', sort_word_dict
                word_list = sort_word_dict[:5]
            except:
                word_list = []
            emotion_result[emotion_mark_dict[word_type]] = word_list
    #print 'emotion_words:', type(emotion_result)
    results['emotion_words'] = emotion_result
    #emotion_conclusion
    results['emotion_conclusion'] = get_emotion_conclusion(emotion_conclusion_dict)
    #topic
    if results['topic']:
        topic_dict = json.loads(results['topic'])
        sort_topic_dict = sorted(topic_dict.items(), key=lambda x:x[1], reverse=True)
        results['topic'] = sort_topic_dict[:5]
    else:
        results['topic'] = []
    #domain
    if results['domain']:
        domain_string = results['domain']
        domain_list = domain_string.split('_')
        results['domain'] = domain_list
    else:
        results['domain'] = []
    #emoticon
    if results['emoticon']:
        emoticon_dict = json.loads(results['emoticon'])
        sort_emoticon_dict = sorted(emoticon_dict.items(), key=lambda x:x[1], reverse=True)
        results['emoticon'] = sort_emoticon_dict[:5]
    else:
        results['emoticon'] = []
    #online_pattern
    if results['online_pattern']:
        online_pattern_dict = json.loads(results['online_pattern'])
        sort_online_pattern_dict = sorted(online_pattern_dict.items(), key=lambda x:x[1], reverse=True)
        results['online_pattern'] = sort_online_pattern_dict[:5]
    else:
        results['online_pattern'] = []
    #psycho_status
    if results['psycho_status']:
        psycho_status_dict = json.loads(results['psycho_status'])
        sort_psycho_status_dict = sorted(psycho_status_dict.items(), key=lambda x:x[1], reverse=True)
        results['psycho_status'] = sort_psycho_status_dict[:5]
    else:
        results['psycho_status'] = []
    #psycho_feature
    if results['psycho_feature']:
        psycho_feature_list = results['psycho_feature'].split('_')
        results['psycho_feature'] = psycho_feature_list
    else:
        results['psycho_feature'] = []
    #state
    if results['uid']:
        uid = results['uid']
        try:
            profile_result = es_user_profile.get(index='weibo_user', doc_type='user', id=uid)
        except:
            profile_result = None
        try:
            user_state = profile_result['_source']['description']
            results['description'] = user_state
        except:
            results['description'] = ''
    else:
        results['uid'] = ''
        results['description'] = ''
    
    if results['importance']:
        #print results['importance']
        query_body = {
                'query':{
                    "range":{
                        "importance":{
                        "from": results['importance'],
                        "to": 1000000
                        }
                        }
                    }
                }
        importance_rank = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body)
        if importance_rank['_shards']['successful'] != 0:
            #print 'importance_rank:', importance_rank
            results['importance_rank'] = importance_rank['count']
        else:
            print 'es_importance_rank error'
            results['importance_rank'] = 0
    else:
        results['importance_rank'] = 0
    if results['activeness']:
        query_body = {
                'query':{
                    "range":{
                        "activeness":{
                            "from":results['activeness'],
                            "to": 1000000
                            }
                        }
                    }
                }
        activeness_rank = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body)
        if activeness_rank['_shards']['successful'] != 0:
            results['activeness_rank'] = activeness_rank['count']
        else:
            print 'es_activess_rank error'
            results['activeness_rank'] = 0
    if results['influence']:
        query_body = {
                'query':{
                    'range':{
                        'influence':{
                            'from':results['influence'],
                            'to': 1000000
                            }
                        }
                    }
                }
        influence_rank = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body)
        if influence_rank['_shards']['successful'] != 0:
            results['influence_rank'] = influence_rank['count']
        else:
            print 'es_influence_rank error'
            results['influence_rank'] = 0
    #total count in user_portrait
    query_body ={
            'query':{
                'match_all':{}
                }
            }
    all_count_results = es_user_portrait.count(index=index_name, doc_type=index_type, body=query_body)
    if all_count_results['_shards']['successful'] != 0:
        results['all_count'] = all_count_results['count']
    else:
        print 'es_user_portrait error'
        results['all_count'] = 0
    #link conclusion
    link_ratio = results['link']
    results['link_conclusion'] = get_link_conclusion(link_ratio)
    return results
示例#11
0
def imagine(uid,
            query_fields_dict,
            index_name=portrait_index_name,
            doctype=portrait_index_type):

    personal_info = es.get(index=portrait_index_name,
                           doc_type=portrait_index_type,
                           id=uid,
                           _source=True)['_source']

    keys_list = query_fields_dict.keys()  #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if personal_info[iter_key] == '' or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query': {
            'function_score': {
                'query': {
                    'bool': {
                        'must': []
                    }
                }
            }
        }
    }
    """
    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard
    """

    number = es.count(index=index_name, doc_type=doctype,
                      body=query_body)['count']
    query_body['size'] = 150  # default number
    query_number = query_fields_dict['size']  #  required number
    query_fields_dict.pop('size')

    for (k, v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({
                'wildcard': {
                    k: {
                        'wildcard': '*' + iter_key + '*',
                        'boost': v
                    }
                }
            })

        query_body['query']['function_score']['query']['bool']['must'].append(
            {'bool': {
                'should': temp_list
            }})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    field_list = ['uid', 'uname', 'activeness', 'importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    return_list = []
    count = 0

    if number > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(
                    value / evaluate_max_dict[field] * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
            info.append(normal_value)
        info.append(item['_score'] / top_score * 100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1,
                                    10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)

    return results
示例#12
0
文件: imagine.py 项目: SwoJa/ruman
def imagine(submit_user, uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type):
    default_setting_dict = query_fields_dict
    print es,portrait_index_name,portrait_index_type,uid
    try :
        personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source']
    except:
        return None
    user_tag = submit_user + "-tag"
    user_tag_string = personal_info.get(user_tag, "")
    if user_tag_string:
        tag_pairs_list = user_tag_string.split('&')
    else:
        tag_pairs_list = []
    tag_dict = dict()
    if tag_pairs_list:
        for item in tag_pairs_list:
            iter_pair = item.split('-')
            tag_dict[iter_pair[0]] = iter_pair[1]

    keys_list = []
    for k, v in query_fields_dict.iteritems():
        if v:
            keys_list.append(k) #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []
    tag_attri_vaule = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if iter_key in personal_info:
            if not personal_info[iter_key] or not query_fields_dict[iter_key]:
                query_fields_dict.pop(iter_key)
                continue
            else:
                iter_list.append(iter_key)
                temp = personal_info[iter_key]
                search_dict[iter_key] = temp.split('&')

        else:
            query_fields_dict.pop(iter_key)
            if tag_dict.get(iter_key,''):
                tag_attri_vaule.append(iter_key+"-"+tag_dict[iter_key])
                

    if len(iter_list) == 0 and len(tag_attri_vaule) == 0:
        return []
    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                            
                        ]
                    }
                }
            }
        }
    }
    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']

    query_body['size'] = 150 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    if tag_attri_vaule:
        query_body['query']['function_score']['query']['bool']['must'].append({"terms":{user_tag:tag_attri_vaule}})

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        if k in personal_info and v != 0:
            for iter_key in search_dict[k]:
                temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*', 'boost': v}}})

            query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    filter_uid = all_delete_uid()
    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    result_list = []

    count = 0

    if len(result) > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        return_dict = {}
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        for field in field_list:
            if field == 'uid':
                uid = item['_source'][field]
                normal_value = uid
                return_dict['uid'] = uid
            elif field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(value / float(evaluate_max_dict[field] )* 9 + 1, 10) * 100
                return_dict[field] = normal_value
            else:
                normal_value = item['_source'][field]
                return_dict[field] = normal_value
                return_dict['similiar'] = item['_score']/float(top_score)*100
        result_list.append(return_dict)
        count += 1

        if count == query_number:
            break

    #return result_list
    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(result_list)
    return results
示例#13
0
def imagine(uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type):

    """
    uid: search users relate to uid
    query_fields_dict: defined search field weight
    fields: domain, topic_string, keywords, activity_geo, hashtag, character_sentiment, character_text
    for example: "domain": 2
    domain, psycho_feature
    """
    personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)["_source"]

    keys_list = query_fields_dict.keys()  # 需要进行关联的键
    keys_list.remove("field")
    keys_list.remove("size")

    search_dict = {}
    iter_list = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if personal_info[iter_key] == "" or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split("&")

    if len(iter_list) == 0:
        return []

    query_body = {"query": {"function_score": {"query": {"bool": {"must": []}}, "field_value_factor": {}}}}

    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict["field"] == "activeness":
        score_standard["field"] = "activeness"
        score_standard["factor"] = 100
    elif query_fields_dict["field"] == "importance":
        score_standard["field"] = "importance"
        score_standard["factor"] = 0.01
    elif query_fields_dict["field"] == "influence":
        score_standard["field"] = "influence"
        score_standard["factor"] = 0.1
    else:
        score_standard["field"] = "influence"
        score_standard["factor"] = 0
        query_body["query"]["function_score"]["boost_mode"] = "sum"

    query_body["query"]["function_score"]["field_value_factor"] = score_standard

    query_fields_dict.pop("field")
    number = es.count(index=index_name, doc_type=doctype, body=query_body)["count"]
    query_body["size"] = 150  # default number
    query_number = query_fields_dict["size"]  #  required number
    query_fields_dict.pop("size")

    for (k, v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({"wildcard": {k: {"wildcard": "*" + iter_key + "*", "boost": v}}})

        query_body["query"]["function_score"]["query"]["bool"]["must"].append({"bool": {"should": temp_list}})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype, body=query_body)["hits"]["hits"]
    field_list = ["uid", "uname", "activeness", "importance", "influence"]
    evaluate_index_list = ["activeness", "importance", "influence"]
    return_list = []
    count = 0

    if number > 1 and result:
        if result[0]["_id"] != uid:
            top_score = result[0]["_score"]
        else:
            top_score = result[1]["_score"]

    # get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item["_id"] or uid in filter_uid:
            score = item["_score"]
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item["_source"][field]
                normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100
            else:
                normal_value = item["_source"][field]
            info.append(normal_value)
        info.append(item["_score"] / top_score * 100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        temp_list.append(personal_info[field])

    results = []
    results.append(temp_list)
    results.extend(return_list)

    return results