Пример #1
0
def search_task(task_name, submit_date, state, status):
    results = []
    query = []
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(' ')
        for item in task_name_list:
            #print 'item:', item
            query.append({'wildcard':{'task_name': '*' + item + '*'}})
            condition_num += 1
    if submit_date:
        query.append({'match':{'submit_date': submit_date}})
        condition_num += 1
    if state:
        state_list = state.split(' ')
        for item in state_list:
            query.append({'wildcard':{'state': '*' + item + '*'}})
            condition_num += 1
    if status:
        query.append({'match':{'status': status}})
        condition_num += 1
    if condition_num > 0:
        try:
            source = es.search(
                    index = 'group_result',
                    doc_type = 'group',
                    body = {
                        'query':{
                            'bool':{
                                'must':query
                                }
                            },
                        'sort': [{'count':{'order': 'desc'}}],
                        'size': 10000
                        }
                    )
        except Exception as e:
            raise e
    else:
        source = es.search(
                index = 'group_result',
                doc_type = 'group',
                body = {
                    'query':{'match_all':{}
                        },
                    'sort': [{'count': {'order': 'desc'}}],
                    'size': 10000
                    }
                )

    try:
        task_dict_list = source['hits']['hits']
    except:
        return None
    result = []
    print 'len task_dict_list:', len(task_dict_list)
    for task_dict in task_dict_list:
        result.append([task_dict['_source']['task_name'], task_dict['_source']['submit_date'], task_dict['_source']['count'], task_dict['_source']['state'], task_dict['_source']['status']])
    
    return result
Пример #2
0
def conclusion_on_influence(uid):
    # test
    index_name = copy_portrait_index_name
    index_type = copy_portrait_index_type
    total_number = es.count(index=copy_portrait_index_name, doc_type=copy_portrait_index_type)["count"]

    try:
        influ_result = es.get(index=index_name, doc_type=index_type, id=uid)["_source"]
    except:
        influ_result = {}
        result = [0, 0, 0, 0, 0, 0, total_number]  # aver_activeness, sorted, aver_influence, sorted
        return result

    aver_activeness = influ_result.get("aver_activeness", 0)
    aver_influence = influ_result.get("aver_influence", 0)
    aver_importance = influ_result.get("aver_importance", 0)
    influence_query_body = {"query": {"match_all": {}}, "sort": {"aver_influence": {"order": "desc"}}, "size": 1}
    top_influence = es.search(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body
    )["hits"]["hits"][0]["sort"][0]

    importance_query_body = {"query": {"match_all": {}}, "sort": {"aver_importance": {"order": "desc"}}, "size": 1}
    top_importance = es.search(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body
    )["hits"]["hits"][0]["sort"][0]

    activeness_query_body = {"query": {"match_all": {}}, "sort": {"aver_activeness": {"order": "desc"}}, "size": 1}
    top_activeness = es.search(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body
    )["hits"]["hits"][0]["sort"][0]

    influence_query_body = {"query": {"filtered": {"filter": {"range": {"aver_influence": {"gt": aver_influence}}}}}}

    activeness_query_body = {"query": {"filtered": {"filter": {"range": {"aver_activeness": {"gt": aver_activeness}}}}}}

    importance_query_body = {"query": {"filtered": {"filter": {"range": {"aver_importance": {"gt": aver_importance}}}}}}

    influence_count = es.count(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=influence_query_body
    )["count"]
    activeness_count = es.count(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=activeness_query_body
    )["count"]
    importance_count = es.count(
        index=copy_portrait_index_name, doc_type=copy_portrait_index_type, body=importance_query_body
    )["count"]

    result = [
        int(aver_activeness * 100.0 / top_activeness),
        activeness_count,
        int(aver_influence * 100.0 / top_influence),
        influence_count,
        int(aver_importance * 100.0 / top_importance),
        importance_count,
        total_number,
    ]
    return result
Пример #3
0
def show_social_sensing_task():
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "term": {
                        "finish": "1"
                    }
                }
            }
        },
        "sort": {
            "create_at": {
                "order": "desc"
            }
        },
        "size": 10000
    }

    results = []
    search_results = es.search(index=index_manage_social_task,
                               doc_type=task_doc_type,
                               body=query_body)['hits']['hits']
    if search_results:
        for item in search_results:
            results.append(item['_source']['task_name'])

    return results
Пример #4
0
def get_top_all_influence(key, ts):
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            key: {
                "order": "desc"
            }
        },
        "size": 1
    }

    index_name = "bci_" + ts2datetime(ts).replace('-', '')
    if not es.indices.exists(index=index_name):
        index_name = "bci_" + ts2datetime(ts - DAY).replace('-', '')
    exist_es = es.indices.exists(index=index_name)
    if exist_es:
        search_result = es.search(index=index_name,
                                  doc_type="bci",
                                  body=query_body)['hits']['hits']
    else:
        search_result = {}
    if search_result:
        result = search_result[0]['_source'][key]
    else:
        result = 2000
    return result
Пример #5
0
def get_attribute_name():
    attribute_name_list = []
    try:
        attribute_result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \
                                     body={'query':{'match_all':{}}})['hits']['hits']
    except Exception, e:
        raise e
Пример #6
0
def get_evaluate_max():
    max_result = {}
    evaluate_index = ['influence', 'activeness', 'importance', 'sensitive']
    for evaluate in evaluate_index:
        query_body = {
            'query': {
                'match_all': {}
            },
            'sort': [{
                evaluate: {
                    'order': 'desc'
                }
            }],
            'size': 1
        }
        try:
            result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\
                    body=query_body)['hits']['hits']
        except:
            result = {}
        try:
            max_evaluate = result[0]['_source'][evaluate]
        except:
            max_evaluate = MAX_VALUE

        max_result[evaluate] = max_evaluate

    return max_result
Пример #7
0
def get_evaluate_max():
    max_result = {}
    index_name = portrait_index_name
    index_type = portrait_index_type
    evaluate_index = ['activeness', 'importance', 'influence']
    for evaluate in evaluate_index:
        query_body = {
            'query': {
                'match_all': {}
            },
            'size': 1,
            'sort': [{
                evaluate: {
                    'order': 'desc'
                }
            }]
        }
        try:
            result = es_user_portrait.search(index=index_name,
                                             doc_type=index_type,
                                             body=query_body)['hits']['hits']
        except Exception, e:
            raise e
        max_evaluate = result[0]['_source'][evaluate]
        max_result[evaluate] = max_evaluate
Пример #8
0
def delete_attribute(attribute_name):
    status = False
    try:
        result = es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
    except:
        return status
    attribute_value = json.loads(result['value'])
    es.delete(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)
    # delete attribute in user_portrait
    query = []
    for value in attribute_value:
        query.append({'match':{attribute_name: value}})
    try:
        attribute_user_result = es.search(index=user_index_name, doc_type=user_index_type, \
                                         body={'query':{'bool':{'must':query}}})['hits']['hits']
    except:
        attribute_user_result = []
    if attribute_user_result==[]:
        status = True
        return status
    bulk_action = []
    for user_dict in attribute_user_result:
        try:
            user_item = user_dict['_source']
        except:
            next
        user_item.pop(attribute)
        user = user_item['uid']
        action = {'index':{'_id':str(user)}}
        bulk_action.extend([action, user_item])
    es.bulk(bulk_action, index=user_index_name, doc_type=index_type)
    status = True
    return status
def get_social_domain(uid_set):
    results = {}
    query_body = {
        'query':{
            'filtered':{
                'filter':{
                    'terms':{
                        'uid': list(uid_set)
                        }
                    }
                }
            },
        'aggs':{
            'all_domain':{
                'terms':{'field': 'domain'}
                }
            }
        }
    search_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \
            body=query_body)['aggregations']['all_domain']['buckets']
    print 'search_result:', search_result
    for item in search_result:
        results[item['key']] = item['doc_count']

    return results
Пример #10
0
def get_uid(uname):
    try:
        portrait_exist_result = es_user_portrait.search(index=profile_index_name, doc_type=profile_index_type, \
            body={"query":{"bool":{"must":{"term":{"nick_name":uname}}}}})['hits']['hits'][0]['_id']
    except:
        return None
    return portrait_exist_result
def show_social_sensing_task(user):
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term": {"finish": "1"}},
                            {"term": {"create_by": user}}
                        ]
                    }
                }
            }
        },
        "sort": {"create_at": {"order": "desc"}},
        "size": 10000
    }

    results = []
    search_results = es.search(index=index_manage_social_task, doc_type=task_doc_type, body=query_body)['hits']['hits']
    if search_results:
        for item in search_results:
            results.append(item['_source']['task_name'])

    return results
Пример #12
0
def get_social_topic(uid_set):
    results = {}
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'terms': {
                        'uid': list(uid_set)
                    }
                }
            }
        },
        'aggs': {
            'all_topic': {
                'terms': {
                    'field': 'topic_string'
                }
            }
        }
    }
    search_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\
            body=query_body)['aggregations']['all_topic']['buckets']
    for item in search_result:
        results[item['key']] = item['doc_count']

    return results
Пример #13
0
def search_portrait(condition_num, query, sort, size):
    user_result = []
    index_name = portrait_index_name
    index_type = portrait_index_type
    if condition_num > 0:
        #try:
        result = es_user_portrait.search(index=index_name, doc_type=index_type, \
                    body={'query':{'bool':{'must':query}}, 'sort':[{sort:{'order':'desc'}}], 'size':size})['hits']['hits']
        #except Exception,e:
        #    raise e
        #print 'result:', result
    else:
        try:
            result = es_user_portrait.search(index=index_name, doc_type=index_type, \
                    body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size})['hits']['hits']
        except Exception, e:
            raise e
Пример #14
0
def search_attribute(query_body, condition_num):
    result = []
    if condition_num==0:
        try:
            result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \
                               body={'query':{'match_all':{}}})['hits']['hits']
        except Exception, e:
            raise e
Пример #15
0
def get_top_influence(key):
    query_body = {"query": {"match_all": {}}, "sort": {key: {"order": "desc"}}, "size": 1}

    search_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)["hits"]["hits"]
    if search_result:
        result = search_result[0]["_source"][key]

    return result
Пример #16
0
def ajax_show_task():
    # show all working task
    # "0": unfinish working task
    # "1": finish working task
    status = request.args.get("finish", "01")
    user = request.args.get('user', '')
    length = len(status)
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [{
                            "term": {
                                "create_by": user
                            }
                        }]
                    }
                }
            }
        },
        "sort": {
            "create_at": {
                "order": "desc"
            }
        },
        "size": 10000
    }
    #if length == 2:
    #    category_list = [status[0], status[1]]
    #    query_body['query']['filtered']['filter']["bool"]["must"].append({"term":{"finish": category_list}})
    if length == 1:
        query_body['query']['filtered']['filter']['bool']['must'].append(
            {"term": {
                "finish": status
            }})
    #else:
    #    print "error"

    try:
        search_results = es.search(index=index_manage_sensing_task,
                                   doc_type=task_doc_type,
                                   body=query_body)['hits']['hits']
    except:
        search_results = []
    results = []
    if search_results:
        for item in search_results:
            item = item['_source']
            history_status = json.loads(item['history_status'])
            if history_status:
                item['history_status'] = sorted(history_status,
                                                key=lambda x: x,
                                                reverse=True)
            else:
                item['history_status'] = []
            results.append(item)
    return json.dumps(results)
Пример #17
0
def search_attribute(query_body, condition_num):
    item_list = []
    default_size = 100000
    if condition_num==0:
        try:
            result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \
                    body={'query':{'match_all':{}}, 'size':default_size})['hits']['hits']
        except Exception, e:
            raise e
Пример #18
0
def ajax_show_task():
    # show all working task
    # "0": unfinish working task
    # "1": finish working task
    status = request.args.get("finish", "01")
    length = len(status)
    query_body = {
        "query": {
            "filtered": {
                "filter": {}
            }
        },
        "sort": {
            "create_at": {
                "order": "desc"
            }
        },
        "size": 10000
    }
    if length == 2:
        category_list = [status[0], status[1]]
        query_body['query']['filtered']['filter']['terms'] = {
            "finish": category_list
        }
    elif length == 1:
        query_body['query']['filtered']['filter']['term'] = {"finish": status}
    else:
        print "error"

    search_results = es.search(index=index_manage_sensing_task,
                               doc_type=task_doc_type,
                               body=query_body)['hits']['hits']

    results = []
    if search_results:
        for item in search_results:
            item = item['_source']
            history_status = json.loads(item['history_status'])
            keywords = json.loads(item['keywords'])
            item['keywords'] = keywords
            if history_status:
                temp_list = []
                temp_list.append(history_status[-1])
                for iter_item in history_status[:-1]:
                    if int(iter_item[-1]) != 0:
                        temp_list.append(iter_item)
                sorted_list = sorted(temp_list,
                                     key=lambda x: x[0],
                                     reverse=True)
                item['history_status'] = sorted_list
            else:
                item['history_status'] = history_status
            results.append(item)

    return json.dumps(results)
Пример #19
0
def get_evaluate_max():
    max_result = {}
    evaluate_index = ["influence", "activeness", "importance"]
    for evaluate in evaluate_index:
        query_body = {"query": {"match_all": {}}, "size": 1, "sort": [{evaluate: {"order": "desc"}}]}
        try:
            result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)["hits"]["hits"]
        except Exception, e:
            raise e
        max_evaluate = result[0]["_source"][evaluate]
        max_result[evaluate] = max_evaluate
Пример #20
0
def search_portrait(condition_num, query, sort, size):
    user_result = []
    index_name = 'user_portrait'
    index_type = 'user'
    if condition_num > 0:
        try:
            #print query
            result = es_user_portrait.search(index=index_name, doc_type=index_type, \
                    body={'query':{'bool':{'must':query}}, 'sort':sort, 'size':size})['hits']['hits']
            #print 'result:', result
        except Exception,e:
            raise e
Пример #21
0
def ajax_show_task():
    # show all working task
    # "0": unfinish working task
    # "1": finish working task
    status = request.args.get("finish", "01")
    user = request.args.get('user', 'admin')
    length = len(status)
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term":{"create_by": user}}
                        ]
                    }
                }
            }
        },
        "sort": {"create_at": {"order": "desc"}},
        "size": 10000
    }
    #if length == 2:
    #    category_list = [status[0], status[1]]
    #    query_body['query']['filtered']['filter']["bool"]["must"].append({"term":{"finish": category_list}})
    if length == 1:
        query_body['query']['filtered']['filter']['bool']['must'].append({"term":{"finish": status}})
    #else:
    #    print "error"

    try:
        search_results = es.search(index=index_manage_sensing_task, doc_type=task_doc_type, body=query_body)['hits']['hits']
    except:
        search_results = []
    results = []
    if search_results:
        for item in search_results:
            item = item['_source']
            history_status = json.loads(item['history_status'])
            if history_status:
                temp_list = []
                temp_list.append(history_status[-1])
                for iter_item in history_status[:-1]:
                    if int(iter_item[-1]) != 0:
                        temp_list.append(iter_item)
                sorted_list = sorted(temp_list, key=lambda x:x[0], reverse=True)
                item['history_status'] = sorted_list
            else:
                item['history_status'] = history_status
            results.append(item)
    print results
    return json.dumps(results)
Пример #22
0
def ajax_get_group_list():
    # get all group list from group manage
    results = []  #
    query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "bool": {
                        "must": [
                            {
                                "term": {
                                    "task_type": "analysis"
                                }
                            },
                            {
                                "term": {
                                    "status": 1
                                }
                            }  # attention-------------------------
                        ]
                    }
                }
            }
        },
        "sort": {
            "submit_date": {
                "order": "desc"
            }
        },
        "size": 10000
    }

    search_results = es.search(index=index_group_manage,
                               doc_type=doc_type_group,
                               body=query_body,
                               timeout=600)['hits']['hits']
    if search_results:
        for item in search_results:
            item = item['_source']
            temp = []
            temp.append(item['task_name'])
            temp.append(item['submit_user'])
            temp.append(item['submit_date'])
            temp.append(item['count'])
            temp.append(item.get('state', ""))
            try:
                temp.append(json.loads(item['uid_list']))
            except:
                temp.append(item['uid_list'])
            results.append(temp)

    return json.dumps(results)
Пример #23
0
def save_detect_single_task(input_dict):
    results = {}
    #step1: identify the seed user is in user_portrait
    seed_user = input_dict['query_condition']['seed_user']
    query = []
    query_list = []
    for user_item in seed_user:
        query_list.append({'term': {user_item: seed_user[user_item]}})
    query.append({'bool': {'should': query_list}})
    try:
        seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \
                            body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits']
    except Exception, e:
        raise e
Пример #24
0
def get_top_influence(key):
    query_body = {
        "query":{
            "match_all": {}
        },
        "sort":{key:{"order":"desc"}},
        "size": 1
    }

    search_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)["hits"]["hits"]
    if search_result:
        result = search_result[0]['_source'][key]

    return result
Пример #25
0
def save_detect_single_task(input_dict):
    results = {}
    #step1: identify the seed user is in user_portrait
    seed_user = input_dict['query_condition']['seed_user']
    query = []
    query_list = []
    for user_item in seed_user:
        query_list.append({'term':{user_item: seed_user[user_item]}})
    query.append({'bool':{'should': query_list}})
    try:
        seed_user_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type, \
                            body={'query':{'bool':{'must':query}}, 'size':1})['hits']['hits']
    except Exception, e:
        raise e
def get_top_influence():
    query_body = {
        "query":{
            "match_all": {}
        },
        "sort":{"influence":{"order":"desc"}},
        "size": 1
    }

    search_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits']
    if search_result:
        result = search_result[0]['_source']['influence']
    else:
        result = 2000

    return result
def get_top_influence(key="influence"):
    query_body = {
        "query":{
            "match_all": {}
        },
        "sort":{key:{"order":"desc"}},
        "size": 1
    }

    search_result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits']
    if search_result:
        result = search_result[0]['_source']['influence']
    else:
        result = 2000

    return result
Пример #28
0
def get_sort(uid):
    try:
        u_bci = es.get(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE, id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
    except:
        return None
    query_body={
        'query':{
            'filtered':{
                'filter':{
                    'range':{'bci_week_ave':{'gte':u_bci}}
                }
            }
        }
    }
    result = es.search(index=BCI_INDEX_NAME, doc_type=BCI_INDEX_TYPE,body=query_body)
    return str(result['hits']['total'])
Пример #29
0
def ajax_get_group_list():
    user = request.args.get('user', '')
    # get all group list from group manage
    results = [] #
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "should":[
                            {"term": {"task_type": "analysis"}},
                            {"bool":{
                                "must":[
                                    {"term": {"task_type": "detect"}},
                                    {"term": {"detect_process":100}}
                                ]
                            }}
                        ],
                        "must":{"term": {"submit_user": user}}
                    }
                }
            }
        },
        "sort": {"submit_date": {"order": "desc"}},
        "size": 10000
    }

    search_results = es.search(index=index_group_manage, doc_type=doc_type_group, body=query_body, timeout=600)['hits']['hits']
    if search_results:
        for item in search_results:
            item = item['_source']
            temp = []
            temp.append(item['task_name'])
            temp.append(item['submit_user'])
            temp.append(item['submit_date'])
            temp.append(0)
            temp.append(item.get('state', ""))
            try:
                temp.append(json.loads(item['uid_list']))
                count = len(json.loads(item['uid_list']))
                temp[3] = count
            except:
                temp.append(item['uid_list'])
                temp[3] = len(item['uid_list'])
            results.append(temp)

    return json.dumps(results)
Пример #30
0
def ajax_get_group_list():
    user = request.args.get('user', '')
    # get all group list from group manage
    results = [] #
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "should":[
                            {"term": {"task_type": "analysis"}},
                            {"bool":{
                                "must":[
                                    {"term": {"task_type": "detect"}},
                                    {"term": {"detect_process":100}}
                                ]
                            }}
                        ],
                        "must":{"term": {"submit_user": user}}
                    }
                }
            }
        },
        "sort": {"submit_date": {"order": "desc"}},
        "size": 10000
    }

    search_results = es.search(index=index_group_manage, doc_type=doc_type_group, body=query_body, timeout=600)['hits']['hits']
    if search_results:
        for item in search_results:
            item = item['_source']
            temp = []
            temp.append(item['task_name'])
            temp.append(item['submit_user'])
            temp.append(item['submit_date'])
            temp.append(0)
            temp.append(item.get('state', ""))
            try:
                temp.append(json.loads(item['uid_list']))
                count = len(json.loads(item['uid_list']))
                temp[3] = count
            except:
                temp.append(item['uid_list'])
                temp[3] = len(item['uid_list'])
            results.append(temp)

    return json.dumps(results)
Пример #31
0
def filter_in_uname(input_dict):
    input_uname = input_dict.keys()
    all_count = len(input_uname)
    iter_count = 0
    in_portrait_result = []
    while iter_count < all_count:
        iter_user_list = input_uname[iter_count: iter_count+FILTER_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\
                    body={'query':{'terms': {'uname': iter_user_list}}}, _source=False, fields=['photo_url', 'uname'])['hits']['hits']
        except:
            portrait_result = []
        if portrait_result:
            iter_in_portrait = [[item['_id'], item['uname'][0], item['photo_url'][0], input_uname[item['uname']]] for item in portrait_result]
        in_portrait_result.extend(iter_in_portrait)
        iter_count += FILTER_ITER_COUNT
    return in_portrait_result
Пример #32
0
def get_evaluate_max():
    max_result = {}
    evaluate_index = ['influence', 'activeness', 'importance']
    for evaluate in evaluate_index:
        query_body = {
            'query':{
                'match_all':{}
                    },
                'size':1,
                'sort':[{evaluate: {'order': 'desc'}}]
                }
        try:
            result = es.search(index=portrait_index_name, doc_type=portrait_index_type, body=query_body)['hits']['hits']
        except Exception, e:
            raise e
        max_evaluate = result[0]['_source'][evaluate]
        max_result[evaluate] = max_evaluate
Пример #33
0
def filter_in_uname(input_dict):
    input_uname = input_dict.keys()
    all_count = len(input_uname)
    iter_count = 0
    in_portrait_result = []
    while iter_count < all_count:
        iter_user_list = input_uname[iter_count: iter_count+FILTER_ITER_COUNT]
        try:
            portrait_result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\
                    body={'query':{'terms': {'uname': iter_user_list}}}, _source=False, fields=['photo_url', 'uname'])['hits']['hits']
        except:
            portrait_result = []
        if portrait_result:
            iter_in_portrait = [[item['_id'], item['uname'][0], item['photo_url'][0], input_uname[item['uname']]] for item in portrait_result]
        in_portrait_result.extend(iter_in_portrait)
        iter_count += FILTER_ITER_COUNT
    return in_portrait_result
Пример #34
0
def ajax_show_task():
    # show all working task
    # "0": unfinish working task
    # "1": finish working task
    status = request.args.get("finish", "01")
    length = len(status)
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                }
            }
        },
        "sort": {"create_at": {"order": "desc"}},
        "size": 10000
    }
    if length == 2:
        category_list = [status[0], status[1]]
        query_body['query']['filtered']['filter']['terms'] = {"finish": category_list}
    elif length == 1:
        query_body['query']['filtered']['filter']['term'] = {"finish": status}
    else:
        print "error"

    search_results = es.search(index=index_manage_sensing_task, doc_type=task_doc_type, body=query_body)['hits']['hits']

    results = []
    if search_results:
        for item in search_results:
            item = item['_source']
            history_status = json.loads(item['history_status'])
            keywords = json.loads(item['keywords'])
            item['keywords'] = keywords
            if history_status:
                temp_list = []
                temp_list.append(history_status[-1])
                for iter_item in history_status[:-1]:
                    if int(iter_item[-1]) != 0:
                        temp_list.append(iter_item)
                sorted_list = sorted(temp_list, key=lambda x:x[0], reverse=True)
                item['history_status'] = sorted_list
            else:
                item['history_status'] = history_status
            results.append(item)

    return json.dumps(results)
def get_top_all_influence(key, ts):
    query_body = {
        "query":{
            "match_all": {}
        },
        "sort":{key:{"order":"desc"}},
        "size": 1
    }

    index_name = "bci_" + ts2datetime(ts-DAY).replace('-','')
    exist_es = es.indices.exists(index=index_name)
    if exist_es:
         search_result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits']
    else:
         search_result = {}
    if search_result:
        result = search_result[0]['_source'][key]
    else:
        result = 2000
    return result
Пример #36
0
def get_evaluate_max():
    max_result = {}
    evaluate_index = ['influence', 'activeness', 'importance', 'sensitive']
    for evaluate in evaluate_index:
        query_body = {
            'query':{
                'match_all':{}
                },
            'size': 1,
            'sort':[{evaluate: {'order': 'desc'}}]
            }
        try:
            result = es_user_portrait.search(index=portrait_index_name, doc_type=portrait_index_type,\
                    body=query_body)['hits']['hits']
        except:
            result = {}
        try:
            max_evaluate = result[0]['_source'][evaluate]
        except:
            max_evaluate = MAX_VALUE
        max_result[evaluate] = max_evaluate
    
    return max_result
Пример #37
0
def get_max_value(es, index_name, _type):
    query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            'bci_week_ave': {
                "order": "desc"
            }
        },
        "size": 1
    }

    max_value = 1
    try:
        result = es.search(index=index_name, doc_type=_type,
                           body=query_body)['hits']['hits']
    except:
        result = []
    if result:
        max_value = result[0]['_source']['bci_week_ave']

    return max_value
Пример #38
0
def ajax_get_group_list():
    # get all group list from group manage
    results = [] #
    query_body = {
        "query":{
            "filtered":{
                "filter":{
                    "bool":{
                        "must":[
                            {"term": {"task_type": "analysis"}},
                            {"term": {"status": 1}} # attention-------------------------
                        ]
                    }
                }
            }
        },
        "sort": {"submit_date": {"order": "desc"}},
        "size": 10000
    }

    search_results = es.search(index=index_group_manage, doc_type=doc_type_group, body=query_body, timeout=600)['hits']['hits']
    if search_results:
        for item in search_results:
            item = item['_source']
            temp = []
            temp.append(item['task_name'])
            temp.append(item['submit_user'])
            temp.append(item['submit_date'])
            temp.append(item['count'])
            temp.append(item.get('state', ""))
            try:
                temp.append(json.loads(item['uid_list']))
            except:
                temp.append(item['uid_list'])
            results.append(temp)

    return json.dumps(results)
Пример #39
0
def get_person_value(uid):
    #认证类型
    #print es_user_profile,profile_index_name,profile_index_type,uid
    try:
        value_static = es_bci_history.get(index=bci_history_index_name,
                                          doc_type=bci_history_index_type,
                                          id=uid)
        value_inf = es_user_portrait.get(index=portrait_index_name,
                                         doc_type=portrait_index_type,
                                         id=uid)
        static = es_user_profile.get(index=profile_index_name,
                                     doc_type=profile_index_type,
                                     id=uid)
    except:
        return 'no'
    fans_max = es_bci_history.search(
        index=bci_history_index_name,
        doc_type=bci_history_index_type,
        body={
            'query': {
                'match_all': {}
            },
            'sort': {
                'user_fansnum': {
                    'order': 'desc'
                }
            },
            'size': 1
        })['hits']['hits'][0]['_source']['user_fansnum']
    print 'max:', fans_max
    #print static['found']
    if static['found'] == False:
        return 'no'
    else:
        static = static['_source']
    #print "static",static
    try:
        ver_calue = verified_value[static['verified_type']]
    except:
        ver_calue = 0
    #账号创建时间
    times = math.ceil((time.time() - int(static['create_at'])) / 31536000)
    #粉丝数
    #person = es_user_profile.get(index = profile_index_name,doc_type = profile_index_type,id=uid)['_source']
    fans_value = math.log(
        float(value_static['_source']['user_fansnum']) / float(fans_max) * 9 +
        1, 10)
    #fans_value = (math.log(static['fansnum']+1000000,100000000)-0.75)*4
    #if fans_value>1:
    #    fans_value=1.0
    influence_max = es_user_portrait.search(
        index=portrait_index_name,
        doc_type=portrait_index_type,
        body={
            'query': {
                'match_all': {}
            },
            'sort': {
                'influence': {
                    'order': 'desc'
                }
            },
            'size': 1
        })['hits']['hits'][0]['_source']['influence']
    influence_value = math.log(
        float(value_inf['_source']['influence']) / float(influence_max) * 9 +
        1, 10)
    final = (ver_calue * 0.1 + times * 0.1 + fans_value * 0.3 +
             influence_value * 0.5) * 50
    print ver_calue, times, fans_value, influence_value
    return final
Пример #40
0
def get_sort(uid, fe):
    result = {}
    try:
        u_bci = es.get(index=BCI_INDEX_NAME,
                       doc_type=BCI_INDEX_TYPE,
                       id=uid,
                       fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        result['in_score'] = u_bci
    except:
        result['in_score'] = ""
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'bci_week_ave': {
                                    'gte': u_bci
                                }
                            }
                        }, {
                            'term': {
                                'topic_string': fe
                            }
                        }]
                    }
                }
            }
        }
    }
    result['in_top'] = es.search(index=BCI_INDEX_NAME,
                                 doc_type=BCI_INDEX_TYPE,
                                 body=query_body)['hits']['total']
    print 'essearch'
    print es.search(index=BCI_INDEX_NAME,
                    doc_type=BCI_INDEX_TYPE,
                    body=query_body)

    try:
        u_bci = es.get(index='bci_history',
                       doc_type='bci',
                       id=uid,
                       fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        #u_bci = es.get(index='user_portrait_1222', doc_type='user', id=uid,fields=['bci_week_ave'])['fields']['bci_week_ave'][0]
        print "trymax"
        bci_max = get_max_value(es_user_profile, "bci_history", "bci")
        print "max", bci_max
        result['all_score'] = math.log(u_bci / float(bci_max) * 9 + 1,
                                       10) * 100
    except:
        result['all_score'] = ""
        result['all_top'] = ""
    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'bci_week_ave': {
                                    'gte': u_bci
                                }
                            }
                        }]
                    }
                }
            }
        }
    }
    result['all_top'] = es.search(index='bci_history',
                                  doc_type='bci',
                                  body=query_body)['hits']['total']

    #result = es.search(index='user_portrait_1222', doc_type='user',body=query_body)
    # return json.dumps([result['hits']['total'],u_bci])
    return json.dumps(result)
Пример #41
0
def imagine(submit_user, uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type):
    default_setting_dict = query_fields_dict
    print es,portrait_index_name,portrait_index_type,uid
    try :
        personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source']
    except:
        return None
    user_tag = submit_user + "-tag"
    user_tag_string = personal_info.get(user_tag, "")
    if user_tag_string:
        tag_pairs_list = user_tag_string.split('&')
    else:
        tag_pairs_list = []
    tag_dict = dict()
    if tag_pairs_list:
        for item in tag_pairs_list:
            iter_pair = item.split('-')
            tag_dict[iter_pair[0]] = iter_pair[1]

    keys_list = []
    for k, v in query_fields_dict.iteritems():
        if v:
            keys_list.append(k) #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []
    tag_attri_vaule = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if iter_key in personal_info:
            if not personal_info[iter_key] or not query_fields_dict[iter_key]:
                query_fields_dict.pop(iter_key)
                continue
            else:
                iter_list.append(iter_key)
                temp = personal_info[iter_key]
                search_dict[iter_key] = temp.split('&')

        else:
            query_fields_dict.pop(iter_key)
            if tag_dict.get(iter_key,''):
                tag_attri_vaule.append(iter_key+"-"+tag_dict[iter_key])
                

    if len(iter_list) == 0 and len(tag_attri_vaule) == 0:
        return []
    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                            
                        ]
                    }
                }
            }
        }
    }
    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']

    query_body['size'] = 150 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    if tag_attri_vaule:
        query_body['query']['function_score']['query']['bool']['must'].append({"terms":{user_tag:tag_attri_vaule}})

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        if k in personal_info and v != 0:
            for iter_key in search_dict[k]:
                temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*', 'boost': v}}})

            query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    filter_uid = all_delete_uid()
    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    result_list = []

    count = 0

    if len(result) > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        return_dict = {}
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        for field in field_list:
            if field == 'uid':
                uid = item['_source'][field]
                normal_value = uid
                return_dict['uid'] = uid
            elif field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(value / float(evaluate_max_dict[field] )* 9 + 1, 10) * 100
                return_dict[field] = normal_value
            else:
                normal_value = item['_source'][field]
                return_dict[field] = normal_value
                return_dict['similiar'] = item['_score']/float(top_score)*100
        result_list.append(return_dict)
        count += 1

        if count == query_number:
            break

    #return result_list
    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(result_list)
    return results
Пример #42
0
 try:
     result = es_tag.get(index=attribute_index_name, doc_type=attribute_index_type, id=id_attribute)['_source']
 except Exception, e:
     #raise e
     return status
 es_tag.delete(index=attribute_index_name, doc_type=attribute_index_type, id=id_attribute)
 # delete attribute in user_portrait
 # user_portrait中,以attribute_name-attribute_value给用户赋值
 query = []
 portrait_attribute_field = []
 attribute_value = result['attribute_value'].split('&')
 for value in attribute_value:
     portrait_attribute_field.append(attribute_name +"-"+value)
 query.append({'match':{submit_user_tag: portrait_attribute_field}})
 try:
     attribute_user_result = es.search(index=user_index_name, doc_type=user_index_type, \
             body={'query':{'bool':{'should':query}}, "size": 100000000})['hits']['hits']
 except:
     attribute_user_result = []
 if attribute_user_result==[]:
     status = True
     return status
 bulk_action = []
 count = 0
 for user_dict in attribute_user_result:
     try:
         user_item = user_dict['_source']
     except:
         next
     tmp = user_item[submit_user_tag]
     delete_set = set(tmp.split('&')) - set(portrait_attribute_field)
     user_item[submit_user_tag] = "&".join(list(delete_set))
Пример #43
0
#use to search user_portrait by lots of condition 
def search_portrait(condition_num, query, sort, size):
    user_result = []
    index_name = 'user_portrait'
    index_type = 'user'
    if condition_num > 0:
        try:
            #print query
            result = es_user_portrait.search(index=index_name, doc_type=index_type, \
                    body={'query':{'bool':{'must':query}}, 'sort':sort, 'size':size})['hits']['hits']
            #print 'result:', result
        except Exception,e:
            raise e
    else:
        try:
            result = es_user_portrait.search(index=index_name, doc_type=index_type, \
                    body={'query':{'match_all':{}}, 'sort':[{sort:{"order":"desc"}}], 'size':size})['hits']['hits']
        except Exception, e:
            raise e
    if result:
        #print 'result:', result
        filter_set = all_delete_uid() # filter_uids_set
        for item in result:
            user_dict = item['_source']
            score = item['_score']

            if not user_dict['uid'] in filter_set:
                user_result.append([user_dict['uid'], user_dict['uname'], user_dict['location'], user_dict['activeness'], user_dict['importance'], user_dict['influence'], score])

    return user_result

Пример #44
0
        status = True
    return status

# use to search attribute table
def search_attribute(query_body, condition_num):
    item_list = []
    default_size = 100000
    if condition_num==0:
        try:
            result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \
                    body={'query':{'match_all':{}}, 'size':default_size})['hits']['hits']
        except Exception, e:
            raise e
    else:
        try:
            result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \
                    body={'query':{'bool':{'must':query_body}}, 'size':default_size})['hits']['hits']
        except Exception, e:
            raise e
    if result:
        for item in result:
            print 'item:', item
            source = item['_source']
            item_list.append(source)
    return item_list

# use to change attribtue
def change_attribute(attribute_name, value, user, state):
    status = False
    # identify the attribute_name is in ES - custom attribute
    try:
        result =  es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
Пример #45
0
def imagine(uid, query_fields_dict, index_name=portrait_index_name, doctype=portrait_index_type):

    """
    uid: search users relate to uid
    query_fields_dict: defined search field weight
    fields: domain, topic_string, keywords, activity_geo, hashtag, character_sentiment, character_text
    for example: "domain": 2
    domain, psycho_feature
    """
    personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)["_source"]

    keys_list = query_fields_dict.keys()  # 需要进行关联的键
    keys_list.remove("field")
    keys_list.remove("size")

    search_dict = {}
    iter_list = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if personal_info[iter_key] == "" or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split("&")

    if len(iter_list) == 0:
        return []

    query_body = {"query": {"function_score": {"query": {"bool": {"must": []}}, "field_value_factor": {}}}}

    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict["field"] == "activeness":
        score_standard["field"] = "activeness"
        score_standard["factor"] = 100
    elif query_fields_dict["field"] == "importance":
        score_standard["field"] = "importance"
        score_standard["factor"] = 0.01
    elif query_fields_dict["field"] == "influence":
        score_standard["field"] = "influence"
        score_standard["factor"] = 0.1
    else:
        score_standard["field"] = "influence"
        score_standard["factor"] = 0
        query_body["query"]["function_score"]["boost_mode"] = "sum"

    query_body["query"]["function_score"]["field_value_factor"] = score_standard

    query_fields_dict.pop("field")
    number = es.count(index=index_name, doc_type=doctype, body=query_body)["count"]
    query_body["size"] = 150  # default number
    query_number = query_fields_dict["size"]  #  required number
    query_fields_dict.pop("size")

    for (k, v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({"wildcard": {k: {"wildcard": "*" + iter_key + "*", "boost": v}}})

        query_body["query"]["function_score"]["query"]["bool"]["must"].append({"bool": {"should": temp_list}})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype, body=query_body)["hits"]["hits"]
    field_list = ["uid", "uname", "activeness", "importance", "influence"]
    evaluate_index_list = ["activeness", "importance", "influence"]
    return_list = []
    count = 0

    if number > 1 and result:
        if result[0]["_id"] != uid:
            top_score = result[0]["_score"]
        else:
            top_score = result[1]["_score"]

    # get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item["_id"] or uid in filter_uid:
            score = item["_score"]
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item["_source"][field]
                normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100
            else:
                normal_value = item["_source"][field]
            info.append(normal_value)
        info.append(item["_score"] / top_score * 100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        temp_list.append(personal_info[field])

    results = []
    results.append(temp_list)
    results.extend(return_list)

    return results
Пример #46
0
def imagine(uid,
            query_fields_dict,
            index_name=portrait_index_name,
            doctype=portrait_index_type):

    personal_info = es.get(index=portrait_index_name,
                           doc_type=portrait_index_type,
                           id=uid,
                           _source=True)['_source']

    keys_list = query_fields_dict.keys()  #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if personal_info[iter_key] == '' or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query': {
            'function_score': {
                'query': {
                    'bool': {
                        'must': []
                    }
                }
            }
        }
    }
    """
    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard
    """

    number = es.count(index=index_name, doc_type=doctype,
                      body=query_body)['count']
    query_body['size'] = 150  # default number
    query_number = query_fields_dict['size']  #  required number
    query_fields_dict.pop('size')

    for (k, v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({
                'wildcard': {
                    k: {
                        'wildcard': '*' + iter_key + '*',
                        'boost': v
                    }
                }
            })

        query_body['query']['function_score']['query']['bool']['must'].append(
            {'bool': {
                'should': temp_list
            }})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    field_list = ['uid', 'uname', 'activeness', 'importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    return_list = []
    count = 0

    if number > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(
                    value / evaluate_max_dict[field] * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
            info.append(normal_value)
        info.append(item['_score'] / top_score * 100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1,
                                    10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)

    return results
Пример #47
0
def conclusion_on_influence(uid):
    # test
    index_name = copy_portrait_index_name
    index_type = copy_portrait_index_type
    total_number = es.count(index=copy_portrait_index_name,
                            doc_type=copy_portrait_index_type)["count"]

    try:
        influ_result = es.get(index=index_name, doc_type=index_type,
                              id=uid)['_source']
    except:
        influ_result = {}
        result = [0, 0, 0, 0, 0, 0, total_number
                  ]  # aver_activeness, sorted, aver_influence, sorted
        return result

    aver_activeness = influ_result.get("aver_activeness", 0)
    aver_influence = influ_result.get("aver_influence", 0)
    aver_importance = influ_result.get('aver_importance', 0)
    influence_query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "aver_influence": {
                "order": "desc"
            }
        },
        "size": 1
    }
    top_influence = es.search(
        index=copy_portrait_index_name,
        doc_type=copy_portrait_index_type,
        body=influence_query_body)['hits']['hits'][0]['sort'][0]

    importance_query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "aver_importance": {
                "order": "desc"
            }
        },
        "size": 1
    }
    top_importance = es.search(
        index=copy_portrait_index_name,
        doc_type=copy_portrait_index_type,
        body=importance_query_body)['hits']['hits'][0]['sort'][0]

    activeness_query_body = {
        "query": {
            "match_all": {}
        },
        "sort": {
            "aver_activeness": {
                "order": "desc"
            }
        },
        "size": 1
    }
    top_activeness = es.search(
        index=copy_portrait_index_name,
        doc_type=copy_portrait_index_type,
        body=activeness_query_body)['hits']['hits'][0]['sort'][0]

    influence_query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "aver_influence": {
                            "gt": aver_influence
                        }
                    }
                }
            }
        }
    }

    activeness_query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "aver_activeness": {
                            "gt": aver_activeness
                        }
                    }
                }
            }
        }
    }

    importance_query_body = {
        "query": {
            "filtered": {
                "filter": {
                    "range": {
                        "aver_importance": {
                            "gt": aver_importance
                        }
                    }
                }
            }
        }
    }

    influence_count = es.count(index=copy_portrait_index_name,
                               doc_type=copy_portrait_index_type,
                               body=influence_query_body)['count']
    activeness_count = es.count(index=copy_portrait_index_name,
                                doc_type=copy_portrait_index_type,
                                body=activeness_query_body)['count']
    importance_count = es.count(index=copy_portrait_index_name,
                                doc_type=copy_portrait_index_type,
                                body=importance_query_body)['count']

    result = [
        int(aver_activeness * 100.0 / top_activeness), activeness_count,
        int(aver_influence * 100.0 / top_influence), influence_count,
        int(aver_importance * 100.0 / top_importance), importance_count,
        total_number
    ]
    return result
Пример #48
0
        es.index(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name, body=input_data)
        status = True
    return status

# use to search attribute table
def search_attribute(query_body, condition_num):
    result = []
    if condition_num==0:
        try:
            result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \
                               body={'query':{'match_all':{}}})['hits']['hits']
        except Exception, e:
            raise e
    else:
        try:
            result = es.search(index=attribute_index_name, doc_type=attribute_index_type, \
                               body={'query':{'bool':{'must':query_body}}})['hits']['hits']
        except Exception, e:
            raise e
    if result:
        for item in result:
            source = item['_source']
            result.append(source)
    return result

# use to change attribtue
def change_attribute(attribute_name, value, user, state):
    status = False
    # identify the attribute_name is in ES - custom attribute
    try:
        result =  es.get(index=attribute_index_name, doc_type=attribute_index_type, id=attribute_name)['_source']
    except:
Пример #49
0
def imagine(uid, query_fields_dict,index_name=portrait_index_name, doctype=portrait_index_type):

    personal_info = es.get(index=portrait_index_name, doc_type=portrait_index_type, id=uid, _source=True)['_source']

    keys_list = query_fields_dict.keys() #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if personal_info[iter_key] == '' or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                        ]
                    }
                }
            }
        }
    }

    """
    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard
    """

    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']
    query_body['size'] = 150 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}})

        query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    return_list = []
    count = 0

    if number > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
            info.append(normal_value)
        info.append(item['_score']/top_score*100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(value / evaluate_max_dict[field] * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)


    return results
Пример #50
0
     return status
 es_tag.delete(index=attribute_index_name,
               doc_type=attribute_index_type,
               id=id_attribute)
 # delete attribute in user_portrait
 # user_portrait中,以attribute_name-attribute_value给用户赋值
 query = []
 portrait_attribute_field = []
 print "result", result
 attribute_value = result['attribute_value'].split('&')
 print "attribute_value", attribute_value
 for value in attribute_value:
     portrait_attribute_field.append(attribute_name + "-" + value)
 query.append({'terms': {submit_user_tag: portrait_attribute_field}})
 try:
     attribute_user_result = es.search(index=user_index_name, doc_type=user_index_type, \
             body={'query':{'bool':{'must':query}}, "size": 100000000})['hits']['hits']
 except:
     attribute_user_result = []
 if attribute_user_result == []:
     status = True
     return status
 bulk_action = []
 count = 0
 for user_dict in attribute_user_result:
     try:
         user_item = user_dict['_source']
     except:
         next
     tmp = user_item[submit_user_tag]
     delete_set = set(tmp.split('&')) - set(portrait_attribute_field)
     print "delete_set", delete_set
Пример #51
0
def search_task(task_name, submit_date, state, status, submit_user):
    results = []
    query = []
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(' ')
        for item in task_name_list:
            query.append({'wildcard':{'task_name': '*' + item + '*'}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_start = submit_date_ts
        submit_date_end = submit_date_ts + DAY
        query.append({'range':{'submit_date': {'gte': submit_date_start, 'lt': submit_date_end}}})
        condition_num += 1
    if state:
        state_list = state.split(' ')
        for item in state_list:
            query.append({'wildcard':{'state': '*' + item + '*'}})
            condition_num += 1
    if status:
        query.append({'match':{'status': status}})
        condition_num += 1
    if submit_user:
        query.append({'term':{'submit_user': submit_user}})
        condition_num += 1
    if condition_num > 0:
        query.append({'term':{'task_type': 'analysis'}})
        try:
            source = es_group_result.search(
                    index = group_index_name,
                    doc_type = group_index_type,
                    body = {
                        'query':{
                            'bool':{
                                'must':query
                                }
                            },
                        'sort': [{'count':{'order': 'desc'}}],
                        'size': MAX_VALUE
                        }
                    )
        except Exception as e:
            raise e
    else:
        query.append({'term':{'task_type': 'analysis'}})
        source = es.search(
                index = group_index_name,
                doc_type = group_index_type,
                body = {
                    'query':{'bool':{
                        'must':query
                        }
                        },
                    'sort': [{'count': {'order': 'desc'}}],
                    'size': MAX_VALUE
                    }
                )

    try:
        task_dict_list = source['hits']['hits']
    except:
        return None
    result = []
    for task_dict in task_dict_list:
        try:
            state = task_dict['_source']['state']
        except:
            state = ''
        try:
            status = task_dict['_source']['status']
        except:
            status = 0
        result.append([task_dict['_source']['task_name'], task_dict['_source']['submit_date'], task_dict['_source']['count'], state, status])
    
    return result
Пример #52
0
     result = es_tag.get(index=attribute_index_name,
                         doc_type=attribute_index_type,
                         id=attribute_name)['_source']
 except Exception, e:
     raise e
     return status
 es_tag.delete(index=attribute_index_name,
               doc_type=attribute_index_type,
               id=attribute_name)
 # delete attribute in user_portrait
 query = []
 attribute_value = result['attribute_value'].split('&')
 for value in attribute_value:
     query.append({'match': {attribute_name: value}})
 try:
     attribute_user_result = es.search(index=user_index_name, doc_type=user_index_type, \
                                      body={'query':{'bool':{'should':query}}})['hits']['hits']
 except:
     attribute_user_result = []
 if attribute_user_result == []:
     status = True
     return status
 bulk_action = []
 for user_dict in attribute_user_result:
     try:
         user_item = user_dict['_source']
     except:
         next
     user_item.pop(attribute_name)
     user = user_item['uid']
     action = {'index': {'_id': str(user)}}
     bulk_action.extend([action, user_item])
Пример #53
0
def imagine(submit_user,
            uid,
            query_fields_dict,
            index_name=portrait_index_name,
            doctype=portrait_index_type):
    default_setting_dict = query_fields_dict

    personal_info = es.get(index=portrait_index_name,
                           doc_type=portrait_index_type,
                           id=uid,
                           _source=True)['_source']

    user_tag = submit_user + "-tag"
    user_tag_string = personal_info.get(user_tag, "")
    if user_tag_string:
        tag_pairs_list = user_tag_string.split('&')
    else:
        tag_pairs_list = []
    tag_dict = dict()
    if tag_pairs_list:
        for item in tag_pairs_list:
            iter_pair = item.split('-')
            tag_dict[iter_pair[0]] = iter_pair[1]

    keys_list = []
    for k, v in query_fields_dict.iteritems():
        if v:
            keys_list.append(k)  #需要进行关联的键
    keys_list.remove('size')

    search_dict = {}
    iter_list = []
    tag_attri_vaule = []

    # 对搜索的键值进行过滤,去掉无用的键
    for iter_key in keys_list:
        if iter_key in personal_info:
            if not personal_info[iter_key] or not query_fields_dict[iter_key]:
                query_fields_dict.pop(iter_key)
                continue
            else:
                iter_list.append(iter_key)
                temp = personal_info[iter_key]
                search_dict[iter_key] = temp.split('&')

        else:
            query_fields_dict.pop(iter_key)
            if tag_dict.get(iter_key, ''):
                tag_attri_vaule.append(iter_key + "-" + tag_dict[iter_key])

    if len(iter_list) == 0 and len(tag_attri_vaule) == 0:
        return []
    query_body = {
        'query': {
            'function_score': {
                'query': {
                    'bool': {
                        'must': []
                    }
                }
            }
        }
    }
    number = es.count(index=index_name, doc_type=doctype,
                      body=query_body)['count']

    query_body['size'] = 150  # default number
    query_number = query_fields_dict['size']  #  required number
    query_fields_dict.pop('size')

    if tag_attri_vaule:
        query_body['query']['function_score']['query']['bool']['must'].append(
            {"terms": {
                user_tag: tag_attri_vaule
            }})

    for (k, v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        if k in personal_info and v != 0:
            for iter_key in search_dict[k]:
                temp_list.append({
                    'wildcard': {
                        k: {
                            'wildcard': '*' + iter_key + '*',
                            'boost': v
                        }
                    }
                })

            query_body['query']['function_score']['query']['bool'][
                'must'].append({'bool': {
                    'should': temp_list
                }})

    filter_uid = all_delete_uid()
    result = es.search(index=index_name, doc_type=doctype,
                       body=query_body)['hits']['hits']
    field_list = ['uid', 'uname', 'activeness', 'importance', 'influence']
    evaluate_index_list = ['activeness', 'importance', 'influence']
    return_list = []
    count = 0

    if len(result) > 1 and result:
        if result[0]['_id'] != uid:
            top_score = result[0]['_score']
        else:
            top_score = result[1]['_score']

    #get evaluate max to normal
    evaluate_max_dict = get_evaluate_max()
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            if field in evaluate_index_list:
                value = item['_source'][field]
                normal_value = math.log(
                    value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
            else:
                normal_value = item['_source'][field]
                if not normal_value:
                    normal_value = item['_id']
            info.append(normal_value)
        info.append(item['_score'] / float(top_score) * 100)
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        if field in evaluate_index_list:
            value = personal_info[field]
            normal_value = math.log(
                value / float(evaluate_max_dict[field]) * 9 + 1, 10) * 100
        else:
            normal_value = personal_info[field]
        temp_list.append(normal_value)

    results = []
    results.append(temp_list)
    results.extend(return_list)
    results.append(default_setting_dict)

    return results
Пример #54
0
def search_task(task_name, submit_date, state, status, submit_user):
    results = []
    query = []
    condition_num = 0
    if task_name:
        task_name_list = task_name.split(' ')
        for item in task_name_list:
            query.append({'wildcard': {'task_name': '*' + item + '*'}})
            condition_num += 1
    if submit_date:
        submit_date_ts = datetime2ts(submit_date)
        submit_date_start = submit_date_ts
        submit_date_end = submit_date_ts + DAY
        query.append({
            'range': {
                'submit_date': {
                    'gte': submit_date_start,
                    'lt': submit_date_end
                }
            }
        })
        condition_num += 1
    if state:
        state_list = state.split(' ')
        for item in state_list:
            query.append({'wildcard': {'state': '*' + item + '*'}})
            condition_num += 1
    if status:
        query.append({'match': {'status': status}})
        condition_num += 1
    if submit_user:
        query.append({'term': {'submit_user': submit_user}})
        condition_num += 1
    if condition_num > 0:
        query.append({'term': {'task_type': 'analysis'}})
        try:
            source = es_group_result.search(index=group_index_name,
                                            doc_type=group_index_type,
                                            body={
                                                'query': {
                                                    'bool': {
                                                        'must': query
                                                    }
                                                },
                                                'sort': [{
                                                    'count': {
                                                        'order': 'desc'
                                                    }
                                                }],
                                                'size':
                                                MAX_VALUE
                                            })
        except Exception as e:
            raise e
    else:
        query.append({'term': {'task_type': 'analysis'}})
        source = es.search(index=group_index_name,
                           doc_type=group_index_type,
                           body={
                               'query': {
                                   'bool': {
                                       'must': query
                                   }
                               },
                               'sort': [{
                                   'count': {
                                       'order': 'desc'
                                   }
                               }],
                               'size': MAX_VALUE
                           })

    try:
        task_dict_list = source['hits']['hits']
    except:
        return None
    result = []
    for task_dict in task_dict_list:
        try:
            state = task_dict['_source']['state']
        except:
            state = ''
        try:
            status = task_dict['_source']['status']
        except:
            status = 0
        result.append([
            task_dict['_source']['task_name'],
            task_dict['_source']['submit_date'], task_dict['_source']['count'],
            state, status
        ])

    return result
Пример #55
0
def imagine(uid, query_fields_dict,index_name="user_portrait", doctype='user'):

    """
    uid: search users relate to uid
    query_fields_dict: defined search field weight
    fields: domain, topic, keywords, psycho_status, psycho_feature, activity_geo, hashtag
    for example: "domain": 2
    domain, psycho_feature
    """
    personal_info = es.get(index="user_portrait", doc_type="user", id=uid, _source=True)['_source']

    keys_list = query_fields_dict.keys()
    keys_list.remove('field')
    keys_list.remove('size')

    search_dict = {}
    iter_list = []

    for iter_key in keys_list:
        if personal_info[iter_key] == '' or not personal_info[iter_key]:
            query_fields_dict.pop(iter_key)
        else:
            iter_list.append(iter_key)
            temp = personal_info[iter_key]
            search_dict[iter_key] = temp.split('&')

    if len(iter_list) == 0:
        return []

    query_body = {
        'query':{
            'function_score':{
                'query':{
                    'bool':{
                        'must':[
                        ]
                    }
                },
                "field_value_factor":{
                }
            }
        }
    }

    score_standard = {}
    score_standard["modifier"] = "log1p"
    if query_fields_dict['field'] == "activeness":
        score_standard['field'] = "activeness"
        score_standard['factor'] = 100
    elif query_fields_dict['field'] == "importance":
        score_standard['field'] = "importance"
        score_standard['factor'] = 0.01
    elif query_fields_dict['field'] == 'influence':
        score_standard['field'] = "influence"
        score_standard['factor'] = 0.1
    else:
        score_standard['field'] = "influence"
        score_standard['factor'] = 0
        query_body['query']['function_score']['boost_mode'] = "sum"

    query_body['query']['function_score']['field_value_factor'] = score_standard

    query_fields_dict.pop('field')
    number = es.count(index=index_name, doc_type=doctype, body=query_body)['count']
    query_body['size'] = 100 # default number
    query_number = query_fields_dict['size'] #  required number
    query_fields_dict.pop('size')

    for (k,v) in query_fields_dict.items():

        temp = {}
        temp_list = []
        for iter_key in search_dict[k]:
            temp_list.append({'wildcard':{k:{'wildcard':'*'+iter_key+'*','boost': v}}})

        query_body['query']['function_score']['query']['bool']['must'].append({'bool':{'should':temp_list}})

    filter_uid = all_delete_uid()

    result = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
    field_list = ['uid','uname', 'activeness','importance', 'influence']
    return_list = []
    count = 0
    for item in result:
        if uid == item['_id'] or uid in filter_uid:
            score = item['_score']
            continue
        info = []
        for field in field_list:
            info.append(item['_source'][field])
        info.append(item['_score'])
        return_list.append(info)
        count += 1

        if count == query_number:
            break

    return_list.append(number)

    temp_list = []
    for field in field_list:
        temp_list.append(personal_info[field])

    results = []
    results.append(temp_list)
    results.extend(return_list)


    return results