示例#1
0
def get_query_device(nday):
    yesterday = time_tool.get_weehours_of_someday(-nday)
    search_data = {
        "size": 0,
        "aggs": {
            "uniq_device": {
                "cardinality": {
                    "field": "device_id.keyword",
                }
            }
        },
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "@timestamp": {
                            "gte": yesterday * 1000,
                            "lte": (yesterday + 86400) * 1000 - 1,
                            "format": "epoch_millis"
                        }
                    }
                }, {
                    "match_phrase": {
                        "key.keyword": {
                            "query": "app_common_use_time"
                        }
                    }
                }]
            }
        }
    }

    return search_data
示例#2
0
def uniq_user_1day(url="", query={}, nday=1):
    start = time_tool.get_weehours_of_someday(nday)
    users = []
    for i in range(24):
        query["query"]["bool"]["must"] = [{
            "range": {
                "@timestamp": {
                    "gte": (start + i * 3600) * 1000,
                    "lte": (start + (i + 1) * 3600) * 1000 - 1,
                    "format": "epoch_millis"
                }
            }
        }]
        r = requests.post(url,
                          headers=JSON_HEADER,
                          data=json.dumps(query),
                          timeout=(30, 60))
        if 200 == r.status_code:
            r_json = r.json()
            # print r_json
            arr_user_id = [
                user['key']
                for user in r_json['aggregations']['uniq_user']['buckets']
            ]
            users = list(set(arr_user_id).union(set(users)))
        else:
            print "request applog index failed, status_code:%d, reason:%s" % (
                r.status_code, r.reason)
            return []
    return users
示例#3
0
文件: puv.py 项目: yeicandoit/trd
def get_query(nday=1):
    start = time_tool.get_weehours_of_someday(nday)
    end = time_tool.get_weehours_of_someday(nday+1)
    search_data = {
        "size": 0,
        "query": {
            "bool": {
                "must": [
                    {
                        "range": {
                            "@timestamp": {
                                "gte": start * 1000,
                                "lte": end * 1000 - 1,
                                "format": "epoch_millis"
                            }
                        }
                    }
                ]
            }
        },
        "aggs": {
            "pv": {
                "terms": {
                    "field": "key.keyword",
                    "size": 50000,
                    "order": {
                        "_term": "desc"
                    }
                },
                "aggs": {
                    "uv_device": {
                        "cardinality": {
                            "field": "device_id.keyword"
                        }
                    },
                    "uv_ip": {
                        "cardinality": {
                            "field": "ip.keyword"
                        }
                    }
                }
            }
        }
    }

    return search_data
示例#4
0
def get_app_stay(query={}, user_channel={}, nday=1):
    app_stay = {}
    app_stay_first = {}
    start = time_tool.get_weehours_of_someday(-nday)
    time_range = {
        "range": {
            "@timestamp": {
                "gte": start * 1000,
                "lte": (start + 86400) * 1000 - 1,
                "format": "epoch_millis"
            }
        }
    }

    query["query"]["bool"]["must"].append(time_range)
    for k, user_arr in user_channel.items():
        app_stay[k] = 0
        for i in range(0, len(user_arr), 10000):
            user_to_query = user_arr[i:i+10000]
            query["query"]["bool"]["filter"]["terms"]["user_id.keyword"] = user_to_query
            r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER,
                              data=json.dumps(query), timeout=(30, 60))
            if 200 == r.status_code:
                r_json = r.json()
                app_stay[k] += r_json['aggregations']['sum_time']['value']
            else:
                logger.error("request user index failed, status_code:%d, reason:%s",
                             r.status_code, r.reason)
        app_stay[k] = app_stay[k] / len(user_arr)

    query_is_first = {
        "match_phrase": {
            "is_first.keyword": {
                "query": "true"
            }
        }
    }
    query["query"]["bool"]["must"].append(query_is_first)
    for k, user_arr in user_channel.items():
        app_stay_first[k] = 0
        for i in range(0, len(user_arr), 10000):
            user_to_query = user_arr[i:i+10000]
            query["query"]["bool"]["filter"]["terms"]["user_id.keyword"] = user_to_query
            r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER,
                              data=json.dumps(query), timeout=(30, 60))
            if 200 == r.status_code:
                r_json = r.json()
                app_stay_first[k] += r_json['aggregations']['sum_time']['value']
            else:
                logger.error("request user index failed, status_code:%d, reason:%s",
                             r.status_code, r.reason)
        app_stay_first[k] = app_stay_first[k] / len(user_arr)

    return app_stay, app_stay_first
示例#5
0
def get_query(nday=1):
    start = time_tool.get_weehours_of_someday(nday)
    end = time_tool.get_weehours_of_someday(nday+1)
    search_data = {
        "size": 0,
        "query": {
            "bool": {
                "must_not": [
                    {
                        "match_phrase": {
                            "user_id.keyword": {
                                "query": "-1"
                            }
                        }
                    }
                ],
                "must": [
                    {
                        "range": {
                            "@timestamp": {
                                "gte": start * 1000,
                                "lte": end * 1000 - 1,
                                "format": "epoch_millis"
                            }
                        }
                    }
                ]
            }
        },
        "aggs": {
            "count": {
                "cardinality": {
                    "field": "user_id.keyword",
                }
            }
        }
    }

    return search_data
示例#6
0
def get_user_arr(nday=1):
    start = time_tool.get_weehours_of_someday(-nday)
    query = {
        "size": 0,
        "query": {
            "bool": {
                "must": [
                    {
                        "range": {
                            "@timestamp": {
                                "gte": start * 1000,
                                "lte": (start + 86400) * 1000 - 1,
                                "format": "epoch_millis"
                            }

                        }
                    }
                ],
                "must_not": [
                    {
                        "match_phrase": {
                            "user_id.keyword": {
                                "query": "-1"
                            }
                        }
                    }
                ]
            }
        },
        "aggs": {
            "user_arr": {
                "terms": {
                    "field": "user_id.keyword",
                    "size": 10000000
                }
            }
        }
    }

    data = []
    r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER,
                      data=json.dumps(query), timeout=(30, 60))
    if 200 == r.status_code:
        r_json = r.json()
        data = [user['key']
                for user in r_json['aggregations']['user_arr']['buckets']]
    else:
        logger.error("request user index failed, status_code:%d, reason:%s",
                     r.status_code, r.reason)
    return data
示例#7
0
def process(query={}, query_uv={}, nday=1):
    start = time_tool.get_weehours_of_someday(-nday)
    for i in range(144):  # calculate every 10 minutes
        data = {}
        timestamp = start + (i + 1) * 600
        time_array = time.localtime(timestamp)
        time_str = time.strftime("%Y-%m-%dT%H:%M:%S+08:00", time_array)
        hkey = time.strftime("%Y-%m-%d_%H-%M", time_array)
        query["query"]["bool"]["must"] = [{
            "range": {
                "@timestamp": {
                    "gte": (start + i * 600) * 1000,
                    "lte": (start + (i + 1) * 600) * 1000 - 1,
                    "format": "epoch_millis"
                }
            }
        }]
        r_json = get_puv4channel(query)
        for key_channel in r_json['aggregations']['pv']['buckets']:
            hkey_ = hkey + "_" + key_channel['key']
            for channel in key_channel['channel_pv']['buckets']:
                hkey__ = hkey_ + "_" + channel['key']
                data[hkey__] = {}
                data[hkey__]['pv'] = channel['doc_count']
                data[hkey__]['key'] = key_channel['key']
                data[hkey__]['channel'] = channel['key']
                data[hkey__]['@timestamp'] = time_str
        query_uv["query"]["bool"]["must"] = [{
            "range": {
                "@timestamp": {
                    "gte": (start + i * 600) * 1000,
                    "lte": (start + (i + 1) * 600) * 1000 - 1,
                    "format": "epoch_millis"
                }
            }
        }]
        r_json = get_puv4channel(query_uv)
        for key_channel in r_json['aggregations']['uv']['buckets']:
            hkey_ = hkey + "_" + key_channel['key']
            for channel in key_channel['channel']['buckets']:
                hkey__ = hkey_ + "_" + channel['key']
                if hkey__ not in data.keys():
                    data[hkey__] = {}
                data[hkey__]['uv'] = channel['channel_uv']['value']
                data[hkey__]['key'] = key_channel['key']
                data[hkey__]['channel'] = channel['key']
                data[hkey__]['@timestamp'] = time_str
        set_puv4channel(data)
示例#8
0
文件: device.py 项目: yeicandoit/trd
def process(nday=1):
    start = time_tool.get_weehours_of_someday(-nday)
    for i in range(86400):
        time_range = {
            "range": {
                "@timestamp": {
                    "gte": (start + i) * 1000,
                    "lte": (start + (i + 1)) * 1000 - 1,
                    "format": "epoch_millis"
                }
            }
        }

        hash_device_ids = get_device(time_range)
        new_device_ids = get_new_device(hash_device_ids.keys())
        add_new_device(new_device_ids, hash_device_ids)
示例#9
0
def get_active_user_num(nday=1):
    start = time_tool.get_weehours_of_someday(-nday)
    query = {
        "size": 0,
        "aggs": {
            "user_num": {
                "cardinality": {
                    "field": "user_id.keyword",
                }
            }
        },
        "query": {
            "bool": {
                "must": [
                    {
                        "range": {
                            "@timestamp": {
                                "gte": start * 1000,
                                "lte": (start + 86400) * 1000 - 1,
                                "format": "epoch_millis"
                            }

                        }
                    }
                ],
                "must_not": [
                    {
                        "match_phrase": {
                            "user_id.keyword": {
                                "query": "-1"
                            }
                        }
                    }
                ]
            }
        }
    }

    r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER,
                      data=json.dumps(query), timeout=(30, 60))
    if 200 == r.status_code:
        r_json = r.json()
        return r_json['aggregations']['user_num']['value']
    else:
        logger.error("request applog index failed, status_code:%d, reason:%s",
                     r.status_code, r.reason)
    return 0
示例#10
0
def get_news(nday=1):
    start = time_tool.get_weehours_of_someday(-nday)
    query = {
        "size": 0,
        "query": {
            "bool": {
                "must_not": [{
                    "match_phrase": {
                        "news_id": -1
                    }
                }],
                "must": [{
                    "range": {
                        "@timestamp": {
                            "gte": start * 1000,
                            "lte": (start + 86400) * 1000 - 1,
                            "format": "epoch_millis"
                        }
                    }
                }]
            }
        },
        "aggs": {
            "news_id_arr": {
                "terms": {
                    "field": "news_id",
                    "size": 100000000
                }
            }
        }
    }

    news_id_arr = []
    r = requests.post(config.URL_APPLOG_SEARCH,
                      headers=config.JSON_HEADER,
                      data=json.dumps(query),
                      timeout=(30, 120))
    if 200 == r.status_code:
        r_json = r.json()
        news_id_arr = [
            n["key"] for n in r_json["aggregations"]["news_id_arr"]["buckets"]
        ]
    else:
        logger.error("request applog index failed, status_code:%d, reason:%s",
                     r.status_code, r.reason)

    return news_id_arr
示例#11
0
def get_puv4news_show_all(nday=1, news_id_arr=[]):
    pv = 0
    uv = 0
    start = time_tool.get_weehours_of_someday(-nday)
    query = {
        "size": 0,
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "@timestamp": {
                            "gte": start * 1000,
                            "lte": (start + 86400) * 1000 - 1,
                            "format": "epoch_millis"
                        }
                    }
                }, {
                    "match_phrase": {
                        "key.keyword": "app_news_show_all_button_click"
                    }
                }]
            }
        },
        "aggs": {
            "count": {
                "cardinality": {
                    "field": "user_id.keyword"
                }
            }
        }
    }
    if len(news_id_arr) > 0:
        query["query"]["bool"]["filter"] = {"terms": {"news_id": news_id_arr}}
    r = requests.post(config.URL_APPLOG_SEARCH,
                      headers=config.JSON_HEADER,
                      data=json.dumps(query),
                      timeout=(30, 120))
    if 200 == r.status_code:
        r_json = r.json()
        pv = r_json["hits"]["total"]
        uv = r_json["aggregations"]["count"]["value"]
    else:
        logger.error(
            "request applog index for news pv, uv failed, status_code:%d, reason:%s",
            r.status_code, r.reason)
    return pv, uv
示例#12
0
def get_new_device(nday=1):
    data = {}
    start = time_tool.get_weehours_of_someday(-nday)
    time_range = [{
        "range": {
            "@timestamp": {
                "gte": start * 1000,
                "lte": (start + 86400) * 1000 - 1,
                "format": "epoch_millis"
            }
        }
    }]

    query = get_query_per_channel()
    query["query"]["bool"]["must"] = time_range
    r = requests.post(URL_ELASTICSEARCH_DEVICE,
                      headers=JSON_HEADER,
                      data=json.dumps(query),
                      timeout=(60, 120))
    if 200 == r.status_code:
        r_json = r.json()
        for v in r_json['aggregations']['per_channel']['buckets']:
            data[v['key']] = v['doc_count']
        logger.debug(data)
    else:
        logger.error("request device index failed, status_code:%d, reason:%s",
                     r.status_code, r.reason)

    query_all = get_query_all_channel()
    query_all["query"]["bool"]["must"] = time_range
    r = requests.post(URL_ELASTICSEARCH_DEVICE,
                      headers=JSON_HEADER,
                      data=json.dumps(query),
                      timeout=(60, 120))
    if 200 == r.status_code:
        r_json = r.json()
        data["all_channel"] = r_json["hits"]["total"]
        logger.debug(data)
    else:
        logger.error("request device index failed, status_code:%d, reason:%s",
                     r.status_code, r.reason)

    return data
示例#13
0
def get_share_num(nday=1):
    start = time_tool.get_weehours_of_someday(-nday)
    query = {
        "size": 0,
        "_source": {
            "excludes": []
        },
        "query": {
            "bool": {
                "must": [
                    {
                        "range": {
                            "@timestamp": {
                                "gte": start * 1000,
                                "lte": (start + 86400) * 1000 - 1,
                                "format": "epoch_millis"
                            }

                        }
                    },
                    {
                        "match_phrase": {
                            "key.keyword": {
                                "query": "app_news_share_button_click"
                            }
                        }
                    }
                ]
            }
        }
    }

    r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER,
                      data=json.dumps(query), timeout=(30, 60))
    if 200 == r.status_code:
        r_json = r.json()
        return r_json['hits']['total']
    else:
        logger.error("request applog index failed, status_code:%d, reason:%s",
                     r.status_code, r.reason)
    return 0
示例#14
0
def uniq_channel_device_1day(url="", query={}, nday=1):
    start = time_tool.get_weehours_of_someday(nday)
    hash_channel_device = {}
    for i in range(24):
        query["query"]["bool"]["must"] = [
            {
                "range": {
                    "@timestamp": {
                        "gte": (start + i * 3600) * 1000,
                        "lte": (start + (i + 1) * 3600) * 1000 - 1,
                        "format": "epoch_millis"
                    }
                }
            }

        ]
        r = requests.post(url, headers=cr.JSON_HEADER,
                          data=json.dumps(query), timeout=(30, 300))
        if 200 == r.status_code:
            r_json = r.json()
            for channel in r_json['aggregations']['uniq_channel']['buckets']:
                channel_key = "unkown" if '' == channel['key'] else channel['key']
                devices = [device['key']
                           for device in channel['uniq_device']['buckets']]
                if channel_key in hash_channel_device.keys():
                    hash_channel_device[channel_key] = list(
                        set(hash_channel_device[channel_key]).union(set(devices)))
                else:
                    hash_channel_device[channel_key] = devices

        else:
            print "request applog index failed, status_code:%d, reason:%s" % (
                r.status_code, r.reason)
            return {}

    return hash_channel_device
示例#15
0
文件: signin.py 项目: yeicandoit/trd
def get_query(nday=1):
    start = time_tool.get_weehours_of_someday(nday)
    end = time_tool.get_weehours_of_someday(nday + 1)
    search_data = {
        "size": 0,
        "query": {
            "bool": {
                "must": [{
                    "range": {
                        "@timestamp": {
                            "gte": start * 1000,
                            "lte": end * 1000 - 1,
                            "format": "epoch_millis"
                        }
                    }
                }]
            }
        },
        "aggs": {
            "signin": {
                "filters": {
                    "filters": {
                        "app_week_sign_pop_show": {
                            "query_string": {
                                "query": "app_week_sign_pop_show",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "app_week_sign_pop_sign_button_click": {
                            "query_string": {
                                "query": "app_week_sign_pop_sign_button_click",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "app_week_sign_pop_receive_reward_button_click": {
                            "query_string": {
                                "query":
                                "app_week_sign_pop_receive_reward_button_click",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "app_task_week_sign_button_click": {
                            "query_string": {
                                "query": "app_task_week_sign_button_click",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "web_activity_invite_20180428_page_show": {
                            "query_string": {
                                "query":
                                "web_activity_invite_20180428_page_show",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "web_activity_invite_20180428_page_invite_button_click":
                        {
                            "query_string": {
                                "query":
                                "web_activity_invite_20180428_page_invite_button_click",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "web_activity_invite_20180428_share_page_show": {
                            "query_string": {
                                "query":
                                "web_activity_invite_20180428_share_page_show",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "web_activity_invite_20180428_share_page_in_wechat_open_button_click":
                        {
                            "query_string": {
                                "query":
                                "web_activity_invite_20180428_share_page_in_wechat_open_button_click",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "web_activity_invite_20180428_share_page_open_button_click":
                        {
                            "query_string": {
                                "query":
                                "web_activity_invite_20180428_share_page_open_button_click",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "web_activity_invite_20180428_share_page_submit_button_click":
                        {
                            "query_string": {
                                "query":
                                "web_activity_invite_20180428_share_page_submit_button_click",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        },
                        "web_activity_invite_20180428_share_page_download_button_click":
                        {
                            "query_string": {
                                "query":
                                "web_activity_invite_20180428_share_page_download_button_click",
                                "analyze_wildcard": True,
                                "default_field": "*"
                            }
                        }
                    }
                },
                "aggs": {
                    "uv_device_id": {
                        "cardinality": {
                            "field": "device_id.keyword"
                        }
                    },
                    "uv_ip": {
                        "cardinality": {
                            "field": "ip.keyword"
                        }
                    }
                }
            }
        }
    }

    return search_data
示例#16
0
def get_app_stay(query={}, query_user={}, nday=1):
    app_stay = {}
    app_stay_first = {}
    start = time_tool.get_weehours_of_someday(-nday)
    time_range = {
        "range": {
            "@timestamp": {
                "gte": start * 1000,
                "lte": (start + 86400) * 1000 - 1,
                "format": "epoch_millis"
            }
        }
    }

    query_user["query"]["bool"]["must"] = [time_range]
    r = requests.post(URL_ELASTICSEARCH_USER,
                      headers=JSON_HEADER,
                      data=json.dumps(query_user),
                      timeout=(30, 60))
    user_channel = {}
    user_channel["all_channel"] = []
    if 200 == r.status_code:
        r_json = r.json()
        for v in r_json['aggregations']['per_channel']['buckets']:
            arr = [user_id['key'] for user_id in v['user_id']['buckets']]
            user_channel[v['key']] = arr
            user_channel["all_channel"].extend(arr)
    else:
        logger.error("request user index failed, status_code:%d, reason:%s",
                     r.status_code, r.reason)

    query["query"]["bool"]["must"].append(time_range)
    for k, v in user_channel.items():
        query["query"]["bool"]["filter"]["terms"]["user_id.keyword"] = v
        r = requests.post(URL_ELASTICSEARCH_APPLOG,
                          headers=JSON_HEADER,
                          data=json.dumps(query),
                          timeout=(30, 60))
        if 200 == r.status_code:
            r_json = r.json()
            if len(v) > 0:
                app_stay[k] = r_json["aggregations"]["sum_time"]["value"] / \
                    len(v)
        else:
            logger.error(
                "request user index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)
    query_is_first = {"match_phrase": {"is_first": {"query": "true"}}}
    query["query"]["bool"]["must"].append(query_is_first)
    for k, v in user_channel.items():
        query["query"]["bool"]["filter"]["terms"]["user_id.keyword"] = v
        r = requests.post(URL_ELASTICSEARCH_APPLOG,
                          headers=JSON_HEADER,
                          data=json.dumps(query),
                          timeout=(30, 60))
        if 200 == r.status_code:
            r_json = r.json()
            if len(v) > 0:
                app_stay_first[k] = r_json["aggregations"]["sum_time"]["value"] / \
                    len(v)
        else:
            logger.error(
                "request user index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)
    return app_stay, app_stay_first
示例#17
0
文件: online.py 项目: yeicandoit/trd
def get_user_device_count(query_online_user={}, nday=1):
    start = time_tool.get_weehours_of_someday(-nday)
    online = {}
    for i in range(288):  # calculate every 5 minutes
        query_online_user["query"]["bool"]["must"] = [{
            "range": {
                "@timestamp": {
                    "gte": (start + i * 300) * 1000,
                    "lte": (start + (i + 1) * 300) * 1000 - 1,
                    "format": "epoch_millis"
                }
            }
        }]
        r = requests.post(URL_ELASTICSEARCH_APPLOG,
                          headers=JSON_HEADER,
                          data=json.dumps(query_online_user),
                          timeout=(60, 120))
        timestamp = start + (i + 1) * 300
        time_array = time.localtime(timestamp)
        key = time.strftime("%Y-%m-%d_%H-%M-%S", time_array)
        online[key] = {}
        online[key]["@timestamp"] = time.strftime("%Y-%m-%dT%H:%M:%S+08:00",
                                                  time_array)
        if 200 == r.status_code:
            r_json = r.json()
            online[key]["num_user"] = r_json["aggregations"]["count"]["value"]
        else:
            logger.error(
                "request applog index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)

    query_online_user["aggs"]["count"]["cardinality"][
        "field"] = "device_id.keyword"
    query_online_user["query"]["bool"]["must_not"] = [{
        "match_phrase": {
            "device_id.keyword": {
                "query": ""
            }
        }
    }]

    for i in range(288):
        query_online_user["query"]["bool"]["must"] = [{
            "range": {
                "@timestamp": {
                    "gte": (start + i * 300) * 1000,
                    "lte": (start + (i + 1) * 300) * 1000 - 1,
                    "format": "epoch_millis"
                }
            }
        }]
        r = requests.post(URL_ELASTICSEARCH_APPLOG,
                          headers=JSON_HEADER,
                          data=json.dumps(query_online_user),
                          timeout=(60, 120))
        if 200 == r.status_code:
            r_json = r.json()
            timestamp = start + (i + 1) * 300
            time_array = time.localtime(timestamp)
            key = time.strftime("%Y-%m-%d_%H-%M-%S", time_array)
            online[key]["num_device"] = r_json["aggregations"]["count"][
                "value"]
        else:
            logger.error(
                "request applog index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)

    return online