示例#1
0
def decide_out_uid(date, data):
    uid_list = []
    now_date = time.strftime("%Y%m%d", time.localtime(time.time()))
    if data:
        uid_list = data.split(",") # decide to delete uids
        exist_data = r_out.hget("decide_delete_list", now_date)
        if exist_data and exist_data != []:
            uid_list.extend(json.loads(exist_data))
            uid_list = list(set(uid_list))
        r_out.hset("decide_delete_list", now_date, json.dumps(uid_list))

    """
    if uid_list and uid_list != []:
        update_record_index(not_out_list)
    """
    filter_uid = all_delete_uid()
    uid_list = data.split(",")
    current_date_list = json.loads(r_out.hget("recommend_delete_list", date))
    new_list =  list(set(current_date_list).difference(set(uid_list)))
    new_list = list(set(new_list).difference(filter_uid))
    r_out.hset("recommend_delete_list", date, json.dumps(new_list))

    """
    if uid_list:
        temp = r_out.hget("history_delete_list", now_date)
        if temp:
            exist_data = json.loads(r_out.hget("history_delete_list", now_date))
            uid_list.extend(exist_data)
        r_out.hset("history_delete_list", now_date, json.dumps(uid_list))

    """
    return 1
示例#2
0
def show_out_uid(fields):
    out_list = []
    recommend_dict = r_out.hgetall("recommend_delete_list")
    recommend_keys = recommend_dict.keys()
    for iter_key in recommend_keys:
        out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key)))
    if not out_list:
        return out_list # no one is recommended to out

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    out_list = list(set(out_list))
    return_list = []
    detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":out_list}, _source=True)['docs']
            # extract the return dict with the field '_source'
    filter_uid = all_delete_uid()
    for i in range(len(out_list)):
        if detail[i]['_source']['uid'] in filter_uid:
            continue
        detail_info = []
        for item in fields:
            if item == "topic":
                detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&")))
            elif item == "influence":
                detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_influence)*100))
            elif item == "importance":
                detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_importance)*100))
            elif item == "activeness":
                detail_info.append(math.ceil(detail[i]["_source"][item]/float(top_activeness)*100))
            else:
                detail_info.append(detail[i]['_source'][item])
        return_list.append(detail_info)

    return return_list
示例#3
0
def show_out_uid(fields):
    out_list = []
    recommend_dict = r_out.hgetall("recommend_delete_list")
    recommend_keys = recommend_dict.keys()
    for iter_key in recommend_keys:
        out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key)))
    if not out_list:
        return out_list # no one is recommended to out

    return_list = []
    detail = es.mget(index="user_portrait", doc_type="user", body={"ids":out_list}, _source=True)['docs']
            # extract the return dict with the field '_source'
    filter_uid = all_delete_uid()
    for i in range(len(out_list)):
        if detail[i]['_source']['uid'] in filter_uid:
            continue
        detail_info = []
        for item in fields:
            if item == "topic":
                detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&")))
            else:
                detail_info.append(detail[i]['_source'][item])
        return_list.append(detail_info)

    return return_list
示例#4
0
def decide_out_uid(date, data):  # 日期:2016-02-26;data:uid,uid
    uid_list = []
    now_date = date
    if data:
        uid_list = data.split(",")  # decide to delete uids
        exist_data = r_out.hget("decide_delete_list", now_date)
        if exist_data:
            uid_list.extend(json.loads(exist_data))
            uid_list = list(set(uid_list))
        r_out.hset("decide_delete_list", now_date, json.dumps(uid_list))

    #从推荐出库的recommend_list中去除已经决定出库的人
    filter_uid = all_delete_uid()
    uid_list = data.split(",")
    current_date_list = json.loads(r_out.hget("recommend_delete_list", date))
    new_list = list(set(current_date_list).difference(set(uid_list)))
    new_list = list(set(new_list).difference(filter_uid))
    r_out.hset("recommend_delete_list", date, json.dumps(new_list))

    return 1
示例#5
0
def show_out_uid():
    fields = ["uid", "uname", "location", "statusnum", "fansnum", "domain", "topic_string", "importance", "influence", "activeness", "sensitive"]
    out_list = []
    recommend_dict = r_out.hgetall("recommend_delete_list")
    recommend_keys = recommend_dict.keys()
    for iter_key in recommend_keys:
        out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key)))
    if not out_list:
        return out_list # no one is recommended to out

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    top_sensitive = get_top_influence("sensitive")
    out_list = list(set(out_list))
    return_list = []
    #bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":out_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs']
    detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":out_list}, _source=True)['docs']
    # extract the return dict with the field '_source'
    filter_uid = all_delete_uid()
    if out_list:
        for i in range(len(out_list)):
            if detail[i]['_id'] in filter_uid:
                continue
            detail_info = []
            for item in fields:
                if item == "topic_string":
                    detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&")))
                elif item == "influence":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_influence) * 9 + 1, 10)*100)
                elif item == "importance":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_importance)*9 + 1, 10)*100)
                elif item == "activeness":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_activeness)*9 + 1, 10)*100)
                elif item == "sensitive":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_sensitive)*9 + 1, 10)*100)
                else:
                    detail_info.append(detail[i]['_source'][item])
            return_list.append(detail_info)

    return return_list
示例#6
0
def show_out_uid():
    fields = ["uid", "uname", "location", "statusnum", "fansnum", "domain", "topic_string", "importance", "influence", "activeness", "sensitive"]
    out_list = []
    recommend_dict = r_out.hgetall("recommend_delete_list")
    recommend_keys = recommend_dict.keys()
    for iter_key in recommend_keys:
        out_list.extend(json.loads(r_out.hget("recommend_delete_list",iter_key)))
    if not out_list:
        return out_list # no one is recommended to out

    top_influence = get_top_influence("influence")
    top_activeness = get_top_influence("activeness")
    top_importance = get_top_influence("importance")
    top_sensitive = get_top_influence("sensitive")
    out_list = list(set(out_list))
    return_list = []
    #bci_history_result = es_bci_history.mget(index=bci_history_index_name, doc_type=bci_history_index_type, body={"ids":out_list}, fields=['user_fansnum', 'weibo_month_sum'])['docs']
    detail = es.mget(index=portrait_index_name, doc_type=portrait_index_type, body={"ids":out_list}, _source=True)['docs']
    # extract the return dict with the field '_source'
    filter_uid = all_delete_uid()
    if out_list:
        for i in range(len(out_list)):
            if detail[i]['_id'] in filter_uid:
                continue
            detail_info = []
            for item in fields:
                if item == "topic_string":
                    detail_info.append(','.join(detail[i]['_source']['topic_string'].split("&")))
                elif item == "influence":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_influence) * 9 + 1, 10)*100)
                elif item == "importance":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_importance)*9 + 1, 10)*100)
                elif item == "activeness":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_activeness)*9 + 1, 10)*100)
                elif item == "sensitive":
                    detail_info.append(math.log(detail[i]["_source"][item]/float(top_sensitive)*9 + 1, 10)*100)
                else:
                    detail_info.append(detail[i]['_source'][item])
            return_list.append(detail_info)

    return return_list
示例#7
0
def decide_out_uid(date, data): # 日期:2016-02-26;data:uid,uid
    uid_list = []
    now_date = date
    if data:
        uid_list = data.split(",") # decide to delete uids
        exist_data = r_out.hget("decide_delete_list", now_date)
        if exist_data:
            uid_list.extend(json.loads(exist_data))
            uid_list = list(set(uid_list))
        r_out.hset("decide_delete_list", now_date, json.dumps(uid_list))


    #从推荐出库的recommend_list中去除已经决定出库的人
    filter_uid = all_delete_uid()
    uid_list = data.split(",")
    current_date_list = json.loads(r_out.hget("recommend_delete_list", date))
    new_list =  list(set(current_date_list).difference(set(uid_list)))
    new_list = list(set(new_list).difference(filter_uid))
    r_out.hset("recommend_delete_list", date, json.dumps(new_list))

    return 1
def imagine(uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type):

    personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source']

    keys_list = query_fields_dict.keys() #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if personal_info[iter_key] == '' or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                        ]
                    }
                }
            }
        }
    }

    """
    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard
    """

    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']
    query_body['size'] = 150 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}})

        query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    return_list = []
    count = 0

    if number > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
            info.append(normal_value)
        info.append(item['_score']/top_score*100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)


    return results
示例#9
0
        #try:
        result = es_user_portrait.search(index=index_name, doc_type=index_type, \
                    body={'query':{'bool':{'must':query}}, 'sort':[{sort:{'order':'desc'}}], 'size':size})['hits']['hits']
        #except Exception,e:
        #    raise e
        #print 'result:', result
    else:
        try:
            result = es_user_portrait.search(index=index_name, doc_type=index_type, \
                    body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size})['hits']['hits']
        except Exception, e:
            raise e
    if result:
        search_result_max = get_evaluate_max()
        
        filter_set = all_delete_uid() # filter_uids_set
        for item in result:
            user_dict = item['_source']
            score = item['_score']

            if not user_dict['uid'] in filter_set:
                result_normal_activeness = math.log(user_dict['activeness'] / search_result_max['activeness'] * 9 + 1, 10)
                result_normal_importance = math.log(user_dict['importance'] / search_result_max['importance'] * 9 + 1, 10)
                result_normal_influence = math.log(user_dict['influence'] / search_result_max['influence'] * 9 + 1, 10)
                result_normal_sensitive = math.log(user_dict['sensitive'] / search_result_max['sensitive'] * 9 + 1, 10)
                user_dict['activeness'] = result_normal_activeness*100
                user_dict['importance'] = result_normal_importance*100
                user_dict['influence'] = result_normal_influence*100
                user_dict['sensitive'] = result_normal_sensitive*100
                uname = user_dict['uname']
                if user_dict['uid']=='1935084477':
示例#10
0
def imagine(submit_user,
            uid,
            query_fields_dict,
            index_name=portrait_index_name,
            doctype=portrait_index_type):
    default_setting_dict = query_fields_dict

    personal_info = es.get(index=portrait_index_name,
                           doc_type=portrait_index_type,
                           id=uid,
                           _source=True)['_source']

    user_tag = submit_user + "-tag"
    user_tag_string = personal_info.get(user_tag, "")
    if user_tag_string:
        tag_pairs_list = user_tag_string.split('&')
    else:
        tag_pairs_list = []
    tag_dict = dict()
    if tag_pairs_list:
        for item in tag_pairs_list:
            iter_pair = item.split('-')
            tag_dict[iter_pair[0]] = iter_pair[1]

    keys_list = []
    for k, v in query_fields_dict.iteritems():
        if v:
            keys_list.append(k)  #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []
    tag_attri_vaule = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if iter_key in personal_info:
            if not personal_info[iter_key] or not query_fields_dict[iter_key]:
                query_fields_dict.pop(iter_key)
                continue
            else:
                iter_list.append(iter_key)
                temp = personal_info[iter_key]
                search_dict[iter_key] = temp.split('&')

        else:
            query_fields_dict.pop(iter_key)
            if tag_dict.get(iter_key, ''):
                tag_attri_vaule.append(iter_key + "-" + tag_dict[iter_key])

    if len(iter_list) == 0 and len(tag_attri_vaule) == 0:
        return []
    query_body = {
        'query': {
            'function_score': {
                'query': {
                    'bool': {
                        'must': []
                    }
                }
            }
        }
    }
    number = es.count(index=index_name, doc_type=doctype,
                      body=query_body)['count']

    query_body['size'] = 150  # default number
    query_number = query_fields_dict['size']  #  required number
    query_fields_dict.pop('size')

    if tag_attri_vaule:
        query_body['query']['function_score']['query']['bool']['must'].append(
            {"terms": {
                user_tag: tag_attri_vaule
            }})

    for (k, v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        if k in personal_info and v != 0:
            for iter_key in search_dict[k]:
                temp_list.append({
                    'wildcard': {
                        k: {
                            'wildcard': '*' + iter_key + '*',
                            'boost': v
                        }
                    }
                })

            query_body['query']['function_score']['query']['bool'][
                'must'].append({'bool': {
                    'should': temp_list
                }})

    filter_uid = all_delete_uid()
    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    field_list = ['uid', 'uname', 'activeness', 'importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    return_list = []
    count = 0

    if len(result) > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(
                    value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
                if not normal_value:
                    normal_value = item['_id']
            info.append(normal_value)
        info.append(item['_score'] / float(top_score) * 100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(
                value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)
    results.append(default_setting_dict)

    return results
示例#11
0
def imagine(uid, query_fields_dict,index_name="user_portrait", doctype='user'):

    """
    uid: search users relate to uid
    query_fields_dict: defined search field weight
    fields: domain, topic, keywords, psycho_status, psycho_feature, activity_geo, hashtag
    for example: "domain": 2
    domain, psycho_feature
    """
    personal_info = es.get(index="user_portrait", doc_type="user", id=uid, _source=True)['_source']

    keys_list = query_fields_dict.keys()
    keys_list.remove('field')
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    for iter_key in keys_list:
        if personal_info[iter_key] == '' or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                        ]
                    }
                },
                "field_value_factor":{
                }
            }
        }
    }

    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard

    query_fields_dict.pop('field')
    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']
    query_body['size'] = 100 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}})

        query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence']
    return_list = []
    count = 0
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            info.append(item['_source'][field])
        info.append(item['_score'])
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        temp_list.append(personal_info[field])

    results = []
    results.append(temp_list)
    results.extend(return_list)


    return results
示例#12
0
        try:
            #print query
            result = es_user_portrait.search(index=index_name, doc_type=index_type, \
                    body={'query':{'bool':{'must':query}}, 'sort':sort, 'size':size})['hits']['hits']
            #print 'result:', result
        except Exception,e:
            raise e
    else:
        try:
            result = es_user_portrait.search(index=index_name, doc_type=index_type, \
                    body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size})['hits']['hits']
        except Exception, e:
            raise e
    if result:
        #print 'result:', result
        filter_set = all_delete_uid() # filter_uids_set
        for item in result:
            user_dict = item['_source']
            score = item['_score']

            if not user_dict['uid'] in filter_set:
                user_result.append([user_dict['uid'], user_dict['uname'], user_dict['location'], user_dict['activeness'], user_dict['importance'], user_dict['influence'], score])

    return user_result


def delete_action(uid_list):
    index_name = 'user_portrait'
    index_type = 'user'
    bulk_action = []
    for uid in uid_list:
示例#13
0
def imagine(uid,
            query_fields_dict,
            index_name=portrait_index_name,
            doctype=portrait_index_type):

    personal_info = es.get(index=portrait_index_name,
                           doc_type=portrait_index_type,
                           id=uid,
                           _source=True)['_source']

    keys_list = query_fields_dict.keys()  #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if personal_info[iter_key] == '' or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query': {
            'function_score': {
                'query': {
                    'bool': {
                        'must': []
                    }
                }
            }
        }
    }
    """
    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard
    """

    number = es.count(index=index_name, doc_type=doctype,
                      body=query_body)['count']
    query_body['size'] = 150  # default number
    query_number = query_fields_dict['size']  #  required number
    query_fields_dict.pop('size')

    for (k, v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({
                'wildcard': {
                    k: {
                        'wildcard': '*' + iter_key + '*',
                        'boost': v
                    }
                }
            })

        query_body['query']['function_score']['query']['bool']['must'].append(
            {'bool': {
                'should': temp_list
            }})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    field_list = ['uid', 'uname', 'activeness', 'importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    return_list = []
    count = 0

    if number > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(
                    value / evaluate_max_dict[field] * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
            info.append(normal_value)
        info.append(item['_score'] / top_score * 100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1,
                                    10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)

    return results
示例#14
0
文件: imagine.py 项目: SwoJa/ruman
def imagine(submit_user, uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type):
    default_setting_dict = query_fields_dict
    print es,portrait_index_name,portrait_index_type,uid
    try :
        personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source']
    except:
        return None
    user_tag = submit_user + "-tag"
    user_tag_string = personal_info.get(user_tag, "")
    if user_tag_string:
        tag_pairs_list = user_tag_string.split('&')
    else:
        tag_pairs_list = []
    tag_dict = dict()
    if tag_pairs_list:
        for item in tag_pairs_list:
            iter_pair = item.split('-')
            tag_dict[iter_pair[0]] = iter_pair[1]

    keys_list = []
    for k, v in query_fields_dict.iteritems():
        if v:
            keys_list.append(k) #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []
    tag_attri_vaule = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if iter_key in personal_info:
            if not personal_info[iter_key] or not query_fields_dict[iter_key]:
                query_fields_dict.pop(iter_key)
                continue
            else:
                iter_list.append(iter_key)
                temp = personal_info[iter_key]
                search_dict[iter_key] = temp.split('&')

        else:
            query_fields_dict.pop(iter_key)
            if tag_dict.get(iter_key,''):
                tag_attri_vaule.append(iter_key+"-"+tag_dict[iter_key])
                

    if len(iter_list) == 0 and len(tag_attri_vaule) == 0:
        return []
    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                            
                        ]
                    }
                }
            }
        }
    }
    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']

    query_body['size'] = 150 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    if tag_attri_vaule:
        query_body['query']['function_score']['query']['bool']['must'].append({"terms":{user_tag:tag_attri_vaule}})

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        if k in personal_info and v != 0:
            for iter_key in search_dict[k]:
                temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*', 'boost': v}}})

            query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    filter_uid = all_delete_uid()
    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    result_list = []

    count = 0

    if len(result) > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        return_dict = {}
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        for field in field_list:
            if field == 'uid':
                uid = item['_source'][field]
                normal_value = uid
                return_dict['uid'] = uid
            elif field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(value / float(evaluate_max_dict[field] )* 9 + 1, 10) * 100
                return_dict[field] = normal_value
            else:
                normal_value = item['_source'][field]
                return_dict[field] = normal_value
                return_dict['similiar'] = item['_score']/float(top_score)*100
        result_list.append(return_dict)
        count += 1

        if count == query_number:
            break

    #return result_list
    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(result_list)
    return results
示例#15
0
def imagine(uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type):

    """
    uid: search users relate to uid
    query_fields_dict: defined search field weight
    fields: domain, topic_string, keywords, activity_geo, hashtag, character_sentiment, character_text
    for example: "domain": 2
    domain, psycho_feature
    """
    personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)["_source"]

    keys_list = query_fields_dict.keys()  # 需要进行关联的键
    keys_list.remove("field")
    keys_list.remove("size")

    search_dict = {}
    iter_list = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if personal_info[iter_key] == "" or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split("&")

    if len(iter_list) == 0:
        return []

    query_body = {"query": {"function_score": {"query": {"bool": {"must": []}}, "field_value_factor": {}}}}

    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict["field"] == "activeness":
        score_standard["field"] = "activeness"
        score_standard["factor"] = 100
    elif query_fields_dict["field"] == "importance":
        score_standard["field"] = "importance"
        score_standard["factor"] = 0.01
    elif query_fields_dict["field"] == "influence":
        score_standard["field"] = "influence"
        score_standard["factor"] = 0.1
    else:
        score_standard["field"] = "influence"
        score_standard["factor"] = 0
        query_body["query"]["function_score"]["boost_mode"] = "sum"

    query_body["query"]["function_score"]["field_value_factor"] = score_standard

    query_fields_dict.pop("field")
    number = es.count(index=index_name, doc_type=doctype, body=query_body)["count"]
    query_body["size"] = 150  # default number
    query_number = query_fields_dict["size"]  #  required number
    query_fields_dict.pop("size")

    for (k, v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({"wildcard": {k: {"wildcard": "*" + iter_key + "*", "boost": v}}})

        query_body["query"]["function_score"]["query"]["bool"]["must"].append({"bool": {"should": temp_list}})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype, body=query_body)["hits"]["hits"]
    field_list = ["uid", "uname", "activeness", "importance", "influence"]
    evaluate_index_list = ["activeness", "importance", "influence"]
    return_list = []
    count = 0

    if number > 1 and result:
        if result[0]["_id"] != uid:
            top_score = result[0]["_score"]
        else:
            top_score = result[1]["_score"]

    # get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item["_id"] or uid in filter_uid:
            score = item["_score"]
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item["_source"][field]
                normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100
            else:
                normal_value = item["_source"][field]
            info.append(normal_value)
        info.append(item["_score"] / top_score * 100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        temp_list.append(personal_info[field])

    results = []
    results.append(temp_list)
    results.extend(return_list)

    return results