def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{sort_order: {"order": "desc"}}]
    }

    if top:
        result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order]
    else:
        search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']

        uid_list = []
        for item in search_result:
            uid_list.append(item['_id'])
        profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs']
        portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs']

        result = []
        rank = 1
        for i in range(len(search_result)):
            info = ['','','','']
            info[0] = rank
            if profile_result[i]['found']:
                info[1] = profile_result[i]['_source'].get('photo_url','')
                info[3] = profile_result[i]['_source'].get('nick_name','')

            info[2] = search_result[i].get('_id','')
            if sort_order in ["user_index","origin_weibo_retweeted_brust_average","origin_weibo_comment_brust_average"]:
                info.append(search_result[i]['_source'][sort_order])
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")
            elif sort_order == "origin_weibo_retweeted_top_number":
               info.append(search_result[i]['_source']['origin_weibo_retweeted_top_number']) 
               mid = search_result[i]['_source']['origin_weibo_top_retweeted_id']
               info.append(weiboinfo2url(info[2],mid))
               if portrait_result[i]['found']:
                   info.append("1")
               else:
                   info.append("0")
            elif sort_order == "origin_weibo_comment_top_number":
                info.append(search_result[i]['_source']['origin_weibo_comment_top_number'])
                mid = search_result[i]['_source']['origin_weibo_top_comment_id']
                info.append(weiboinfo2url(info[2],mid))
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")

            rank += 1
            result.append(info)

    return result
def query_brust(index_name,field_name, range_1=0, range_2=50000, count=0):
    query_body = {
        "query":{
            "filtered": {
                "query": {
                    "match_all":{}
                },
                "filter": {
                    "range": {
                        field_name: {
                            "gte": range_1,
                            "lt": range_2
                        }
                    }
                }
            }
        }
    }

    if count == 1:
        result = es.count(index=index_name, doc_type="bci", body=query_body)['count']
        return result

    else:
        query_body['size'] = 1000
        result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits']

        profile_list = []
        for item in result:
            profile_list.append(item['_id'])

        return profile_list
def search_portrait_history_active_info(uid, date, index_name="copy_user_portrait", doctype="user"):
    # date.formate: 20130901
    date_list = time_series(date)

    try:
        result = es.get(index=index_name, doc_type=doctype, id=uid, _source=True)['_source']
    except NotFoundError:
        return "NotFound"
    except:
        return None
    
    date_max = {}
    for date_str in date_list:
        query_body = {
            'query':{
                'match_all':{}
                },
            'size': 1,
            'sort': [{date_str: {'order': 'desc'}}]
        }
        try:
            max_item = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
        except Exception, e:
            raise e
        date_max[date_str] = max_item[0]['_source'][date_str]
def query_brust(index_name, field_name, range_1=0, range_2=50000, count=0):
    query_body = {
        "query": {
            "filtered": {
                "query": {
                    "match_all": {}
                },
                "filter": {
                    "range": {
                        field_name: {
                            "gte": range_1,
                            "lt": range_2
                        }
                    }
                }
            }
        }
    }

    if count == 1:
        result = es.count(index=index_name, doc_type="bci",
                          body=query_body)['count']
        return result

    else:
        query_body['size'] = 1000
        result = es.search(index=index_name, doc_type="bci",
                           body=query_body)['hits']['hits']

        profile_list = []
        for item in result:
            profile_list.append(item['_id'])

        return profile_list
def search_k(es, index_name, index_type, start, field="user_index", size=100):
    query_body = {
        "query":{
            "match_all": {}
            },
        "size": size,
        "from": start,
        "sort": [{field: {"order": "desc"}}]
    }

    result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']

    search_list = []
    for item in result:
        search_list.append(item['_source'])

    return search_list
Пример #6
0
def get_evaluate_max(index_name):
    max_result = {}
    index_type = 'bci'
    evaluate_index = ['user_index']
    for evaluate in evaluate_index:
        query_body = {
            'query':{
                'match_all':{}
                },
            'size':1,
            'sort':[{evaluate: {'order': 'desc'}}]
            }
        try:
            result = es_cluster.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
        except Exception, e:
            raise e
        max_evaluate = result[0]['_source'][evaluate]
        max_result[evaluate] = max_evaluate
Пример #7
0
def get_evaluate_max(index_name):
    max_result = {}
    index_type = 'bci'
    evaluate_index = ['user_index']
    for evaluate in evaluate_index:
        query_body = {
            'query':{
                'match_all':{}
                },
            'size':1,
            'sort':[{evaluate: {'order': 'desc'}}]
            }
        try:
            result = es_cluster.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
        except Exception, e:
            raise e
        max_evaluate = result[0]['_source'][evaluate]
        max_result[evaluate] = max_evaluate
def search_k(es, index_name, index_type, start, field="user_index", size=100):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": size,
        "from": start,
        "sort": [{
            field: {
                "order": "desc"
            }
        }]
    }

    result = es.search(index=index_name, doc_type=index_type,
                       body=query_body)['hits']['hits']

    search_list = []
    for item in result:
        search_list.append(item['_source'])

    return search_list
def search_portrait_history_active_info(uid,
                                        date,
                                        index_name=copy_portrait_index_name,
                                        doctype=copy_portrait_index_name):
    # date.formate: 20130901
    date_list = time_series(date)

    try:
        result = es.get(index=index_name,
                        doc_type=doctype,
                        id=uid,
                        _source=True)['_source']
    except NotFoundError:
        return "NotFound"
    except:
        return None

    date_max = {}
    for date_str in date_list:
        query_body = {
            'query': {
                'match_all': {}
            },
            'size': 1,
            'sort': [{
                date_str: {
                    'order': 'desc'
                }
            }]
        }
        try:
            max_item = es.search(index=index_name,
                                 doc_type=doctype,
                                 body=query_body)['hits']['hits']
        except Exception, e:
            raise e
        date_max[date_str] = max_item[0]['_source'][date_str]
def search_top_index(index_name,
                     top_k=1,
                     index_type="bci",
                     top=False,
                     sort_order="user_index"):
    query_body = {
        "query": {
            "match_all": {}
        },
        "size": top_k,
        "sort": [{
            sort_order: {
                "order": "desc"
            }
        }]
    }

    if top:
        result = es.search(
            index=index_name, doc_type=index_type,
            body=query_body)['hits']['hits'][0]['_source'][sort_order]
    else:
        search_result = es.search(index=index_name,
                                  doc_type=index_type,
                                  body=query_body)['hits']['hits']

        uid_list = []
        for item in search_result:
            uid_list.append(item['_id'])
        profile_result = es_profile.mget(index=profile_index_name,
                                         doc_type=profile_index_type,
                                         body={"ids": uid_list},
                                         _source=True)['docs']
        portrait_result = es_portrait.mget(index=portrait_index_name,
                                           doc_type=portrait_index_type,
                                           body={"ids": uid_list},
                                           _source=True)['docs']

        result = []
        rank = 1
        for i in range(len(search_result)):
            info = ['', '', '', '']
            info[0] = rank
            if profile_result[i]['found']:
                info[1] = profile_result[i]['_source'].get('photo_url', '')
                info[3] = profile_result[i]['_source'].get('nick_name', '')

            info[2] = search_result[i].get('_id', '')
            if sort_order in [
                    "user_index", "origin_weibo_retweeted_brust_average",
                    "origin_weibo_comment_brust_average"
            ]:
                info.append(search_result[i]['_source'][sort_order])
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")
            elif sort_order == "origin_weibo_retweeted_top_number":
                info.append(search_result[i]['_source']
                            ['origin_weibo_retweeted_top_number'])
                mid = search_result[i]['_source'][
                    'origin_weibo_top_retweeted_id']
                info.append(weiboinfo2url(info[2], mid))
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")
            elif sort_order == "origin_weibo_comment_top_number":
                info.append(search_result[i]['_source']
                            ['origin_weibo_comment_top_number'])
                mid = search_result[i]['_source'][
                    'origin_weibo_top_comment_id']
                info.append(weiboinfo2url(info[2], mid))
                if portrait_result[i]['found']:
                    info.append("1")
                else:
                    info.append("0")

            rank += 1
            result.append(info)

    return result