def get_query_device(nday): yesterday = time_tool.get_weehours_of_someday(-nday) search_data = { "size": 0, "aggs": { "uniq_device": { "cardinality": { "field": "device_id.keyword", } } }, "query": { "bool": { "must": [{ "range": { "@timestamp": { "gte": yesterday * 1000, "lte": (yesterday + 86400) * 1000 - 1, "format": "epoch_millis" } } }, { "match_phrase": { "key.keyword": { "query": "app_common_use_time" } } }] } } } return search_data
def uniq_user_1day(url="", query={}, nday=1): start = time_tool.get_weehours_of_someday(nday) users = [] for i in range(24): query["query"]["bool"]["must"] = [{ "range": { "@timestamp": { "gte": (start + i * 3600) * 1000, "lte": (start + (i + 1) * 3600) * 1000 - 1, "format": "epoch_millis" } } }] r = requests.post(url, headers=JSON_HEADER, data=json.dumps(query), timeout=(30, 60)) if 200 == r.status_code: r_json = r.json() # print r_json arr_user_id = [ user['key'] for user in r_json['aggregations']['uniq_user']['buckets'] ] users = list(set(arr_user_id).union(set(users))) else: print "request applog index failed, status_code:%d, reason:%s" % ( r.status_code, r.reason) return [] return users
def get_query(nday=1): start = time_tool.get_weehours_of_someday(nday) end = time_tool.get_weehours_of_someday(nday+1) search_data = { "size": 0, "query": { "bool": { "must": [ { "range": { "@timestamp": { "gte": start * 1000, "lte": end * 1000 - 1, "format": "epoch_millis" } } } ] } }, "aggs": { "pv": { "terms": { "field": "key.keyword", "size": 50000, "order": { "_term": "desc" } }, "aggs": { "uv_device": { "cardinality": { "field": "device_id.keyword" } }, "uv_ip": { "cardinality": { "field": "ip.keyword" } } } } } } return search_data
def get_app_stay(query={}, user_channel={}, nday=1): app_stay = {} app_stay_first = {} start = time_tool.get_weehours_of_someday(-nday) time_range = { "range": { "@timestamp": { "gte": start * 1000, "lte": (start + 86400) * 1000 - 1, "format": "epoch_millis" } } } query["query"]["bool"]["must"].append(time_range) for k, user_arr in user_channel.items(): app_stay[k] = 0 for i in range(0, len(user_arr), 10000): user_to_query = user_arr[i:i+10000] query["query"]["bool"]["filter"]["terms"]["user_id.keyword"] = user_to_query r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER, data=json.dumps(query), timeout=(30, 60)) if 200 == r.status_code: r_json = r.json() app_stay[k] += r_json['aggregations']['sum_time']['value'] else: logger.error("request user index failed, status_code:%d, reason:%s", r.status_code, r.reason) app_stay[k] = app_stay[k] / len(user_arr) query_is_first = { "match_phrase": { "is_first.keyword": { "query": "true" } } } query["query"]["bool"]["must"].append(query_is_first) for k, user_arr in user_channel.items(): app_stay_first[k] = 0 for i in range(0, len(user_arr), 10000): user_to_query = user_arr[i:i+10000] query["query"]["bool"]["filter"]["terms"]["user_id.keyword"] = user_to_query r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER, data=json.dumps(query), timeout=(30, 60)) if 200 == r.status_code: r_json = r.json() app_stay_first[k] += r_json['aggregations']['sum_time']['value'] else: logger.error("request user index failed, status_code:%d, reason:%s", r.status_code, r.reason) app_stay_first[k] = app_stay_first[k] / len(user_arr) return app_stay, app_stay_first
def get_query(nday=1): start = time_tool.get_weehours_of_someday(nday) end = time_tool.get_weehours_of_someday(nday+1) search_data = { "size": 0, "query": { "bool": { "must_not": [ { "match_phrase": { "user_id.keyword": { "query": "-1" } } } ], "must": [ { "range": { "@timestamp": { "gte": start * 1000, "lte": end * 1000 - 1, "format": "epoch_millis" } } } ] } }, "aggs": { "count": { "cardinality": { "field": "user_id.keyword", } } } } return search_data
def get_user_arr(nday=1): start = time_tool.get_weehours_of_someday(-nday) query = { "size": 0, "query": { "bool": { "must": [ { "range": { "@timestamp": { "gte": start * 1000, "lte": (start + 86400) * 1000 - 1, "format": "epoch_millis" } } } ], "must_not": [ { "match_phrase": { "user_id.keyword": { "query": "-1" } } } ] } }, "aggs": { "user_arr": { "terms": { "field": "user_id.keyword", "size": 10000000 } } } } data = [] r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER, data=json.dumps(query), timeout=(30, 60)) if 200 == r.status_code: r_json = r.json() data = [user['key'] for user in r_json['aggregations']['user_arr']['buckets']] else: logger.error("request user index failed, status_code:%d, reason:%s", r.status_code, r.reason) return data
def process(query={}, query_uv={}, nday=1): start = time_tool.get_weehours_of_someday(-nday) for i in range(144): # calculate every 10 minutes data = {} timestamp = start + (i + 1) * 600 time_array = time.localtime(timestamp) time_str = time.strftime("%Y-%m-%dT%H:%M:%S+08:00", time_array) hkey = time.strftime("%Y-%m-%d_%H-%M", time_array) query["query"]["bool"]["must"] = [{ "range": { "@timestamp": { "gte": (start + i * 600) * 1000, "lte": (start + (i + 1) * 600) * 1000 - 1, "format": "epoch_millis" } } }] r_json = get_puv4channel(query) for key_channel in r_json['aggregations']['pv']['buckets']: hkey_ = hkey + "_" + key_channel['key'] for channel in key_channel['channel_pv']['buckets']: hkey__ = hkey_ + "_" + channel['key'] data[hkey__] = {} data[hkey__]['pv'] = channel['doc_count'] data[hkey__]['key'] = key_channel['key'] data[hkey__]['channel'] = channel['key'] data[hkey__]['@timestamp'] = time_str query_uv["query"]["bool"]["must"] = [{ "range": { "@timestamp": { "gte": (start + i * 600) * 1000, "lte": (start + (i + 1) * 600) * 1000 - 1, "format": "epoch_millis" } } }] r_json = get_puv4channel(query_uv) for key_channel in r_json['aggregations']['uv']['buckets']: hkey_ = hkey + "_" + key_channel['key'] for channel in key_channel['channel']['buckets']: hkey__ = hkey_ + "_" + channel['key'] if hkey__ not in data.keys(): data[hkey__] = {} data[hkey__]['uv'] = channel['channel_uv']['value'] data[hkey__]['key'] = key_channel['key'] data[hkey__]['channel'] = channel['key'] data[hkey__]['@timestamp'] = time_str set_puv4channel(data)
def process(nday=1): start = time_tool.get_weehours_of_someday(-nday) for i in range(86400): time_range = { "range": { "@timestamp": { "gte": (start + i) * 1000, "lte": (start + (i + 1)) * 1000 - 1, "format": "epoch_millis" } } } hash_device_ids = get_device(time_range) new_device_ids = get_new_device(hash_device_ids.keys()) add_new_device(new_device_ids, hash_device_ids)
def get_active_user_num(nday=1): start = time_tool.get_weehours_of_someday(-nday) query = { "size": 0, "aggs": { "user_num": { "cardinality": { "field": "user_id.keyword", } } }, "query": { "bool": { "must": [ { "range": { "@timestamp": { "gte": start * 1000, "lte": (start + 86400) * 1000 - 1, "format": "epoch_millis" } } } ], "must_not": [ { "match_phrase": { "user_id.keyword": { "query": "-1" } } } ] } } } r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER, data=json.dumps(query), timeout=(30, 60)) if 200 == r.status_code: r_json = r.json() return r_json['aggregations']['user_num']['value'] else: logger.error("request applog index failed, status_code:%d, reason:%s", r.status_code, r.reason) return 0
def get_news(nday=1): start = time_tool.get_weehours_of_someday(-nday) query = { "size": 0, "query": { "bool": { "must_not": [{ "match_phrase": { "news_id": -1 } }], "must": [{ "range": { "@timestamp": { "gte": start * 1000, "lte": (start + 86400) * 1000 - 1, "format": "epoch_millis" } } }] } }, "aggs": { "news_id_arr": { "terms": { "field": "news_id", "size": 100000000 } } } } news_id_arr = [] r = requests.post(config.URL_APPLOG_SEARCH, headers=config.JSON_HEADER, data=json.dumps(query), timeout=(30, 120)) if 200 == r.status_code: r_json = r.json() news_id_arr = [ n["key"] for n in r_json["aggregations"]["news_id_arr"]["buckets"] ] else: logger.error("request applog index failed, status_code:%d, reason:%s", r.status_code, r.reason) return news_id_arr
def get_puv4news_show_all(nday=1, news_id_arr=[]): pv = 0 uv = 0 start = time_tool.get_weehours_of_someday(-nday) query = { "size": 0, "query": { "bool": { "must": [{ "range": { "@timestamp": { "gte": start * 1000, "lte": (start + 86400) * 1000 - 1, "format": "epoch_millis" } } }, { "match_phrase": { "key.keyword": "app_news_show_all_button_click" } }] } }, "aggs": { "count": { "cardinality": { "field": "user_id.keyword" } } } } if len(news_id_arr) > 0: query["query"]["bool"]["filter"] = {"terms": {"news_id": news_id_arr}} r = requests.post(config.URL_APPLOG_SEARCH, headers=config.JSON_HEADER, data=json.dumps(query), timeout=(30, 120)) if 200 == r.status_code: r_json = r.json() pv = r_json["hits"]["total"] uv = r_json["aggregations"]["count"]["value"] else: logger.error( "request applog index for news pv, uv failed, status_code:%d, reason:%s", r.status_code, r.reason) return pv, uv
def get_new_device(nday=1): data = {} start = time_tool.get_weehours_of_someday(-nday) time_range = [{ "range": { "@timestamp": { "gte": start * 1000, "lte": (start + 86400) * 1000 - 1, "format": "epoch_millis" } } }] query = get_query_per_channel() query["query"]["bool"]["must"] = time_range r = requests.post(URL_ELASTICSEARCH_DEVICE, headers=JSON_HEADER, data=json.dumps(query), timeout=(60, 120)) if 200 == r.status_code: r_json = r.json() for v in r_json['aggregations']['per_channel']['buckets']: data[v['key']] = v['doc_count'] logger.debug(data) else: logger.error("request device index failed, status_code:%d, reason:%s", r.status_code, r.reason) query_all = get_query_all_channel() query_all["query"]["bool"]["must"] = time_range r = requests.post(URL_ELASTICSEARCH_DEVICE, headers=JSON_HEADER, data=json.dumps(query), timeout=(60, 120)) if 200 == r.status_code: r_json = r.json() data["all_channel"] = r_json["hits"]["total"] logger.debug(data) else: logger.error("request device index failed, status_code:%d, reason:%s", r.status_code, r.reason) return data
def get_share_num(nday=1): start = time_tool.get_weehours_of_someday(-nday) query = { "size": 0, "_source": { "excludes": [] }, "query": { "bool": { "must": [ { "range": { "@timestamp": { "gte": start * 1000, "lte": (start + 86400) * 1000 - 1, "format": "epoch_millis" } } }, { "match_phrase": { "key.keyword": { "query": "app_news_share_button_click" } } } ] } } } r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER, data=json.dumps(query), timeout=(30, 60)) if 200 == r.status_code: r_json = r.json() return r_json['hits']['total'] else: logger.error("request applog index failed, status_code:%d, reason:%s", r.status_code, r.reason) return 0
def uniq_channel_device_1day(url="", query={}, nday=1): start = time_tool.get_weehours_of_someday(nday) hash_channel_device = {} for i in range(24): query["query"]["bool"]["must"] = [ { "range": { "@timestamp": { "gte": (start + i * 3600) * 1000, "lte": (start + (i + 1) * 3600) * 1000 - 1, "format": "epoch_millis" } } } ] r = requests.post(url, headers=cr.JSON_HEADER, data=json.dumps(query), timeout=(30, 300)) if 200 == r.status_code: r_json = r.json() for channel in r_json['aggregations']['uniq_channel']['buckets']: channel_key = "unkown" if '' == channel['key'] else channel['key'] devices = [device['key'] for device in channel['uniq_device']['buckets']] if channel_key in hash_channel_device.keys(): hash_channel_device[channel_key] = list( set(hash_channel_device[channel_key]).union(set(devices))) else: hash_channel_device[channel_key] = devices else: print "request applog index failed, status_code:%d, reason:%s" % ( r.status_code, r.reason) return {} return hash_channel_device
def get_query(nday=1): start = time_tool.get_weehours_of_someday(nday) end = time_tool.get_weehours_of_someday(nday + 1) search_data = { "size": 0, "query": { "bool": { "must": [{ "range": { "@timestamp": { "gte": start * 1000, "lte": end * 1000 - 1, "format": "epoch_millis" } } }] } }, "aggs": { "signin": { "filters": { "filters": { "app_week_sign_pop_show": { "query_string": { "query": "app_week_sign_pop_show", "analyze_wildcard": True, "default_field": "*" } }, "app_week_sign_pop_sign_button_click": { "query_string": { "query": "app_week_sign_pop_sign_button_click", "analyze_wildcard": True, "default_field": "*" } }, "app_week_sign_pop_receive_reward_button_click": { "query_string": { "query": "app_week_sign_pop_receive_reward_button_click", "analyze_wildcard": True, "default_field": "*" } }, "app_task_week_sign_button_click": { "query_string": { "query": "app_task_week_sign_button_click", "analyze_wildcard": True, "default_field": "*" } }, "web_activity_invite_20180428_page_show": { "query_string": { "query": "web_activity_invite_20180428_page_show", "analyze_wildcard": True, "default_field": "*" } }, "web_activity_invite_20180428_page_invite_button_click": { "query_string": { "query": "web_activity_invite_20180428_page_invite_button_click", "analyze_wildcard": True, "default_field": "*" } }, "web_activity_invite_20180428_share_page_show": { "query_string": { "query": "web_activity_invite_20180428_share_page_show", "analyze_wildcard": True, "default_field": "*" } }, "web_activity_invite_20180428_share_page_in_wechat_open_button_click": { "query_string": { "query": "web_activity_invite_20180428_share_page_in_wechat_open_button_click", "analyze_wildcard": True, "default_field": "*" } }, "web_activity_invite_20180428_share_page_open_button_click": { "query_string": { "query": "web_activity_invite_20180428_share_page_open_button_click", "analyze_wildcard": True, "default_field": "*" } }, "web_activity_invite_20180428_share_page_submit_button_click": { "query_string": { "query": "web_activity_invite_20180428_share_page_submit_button_click", "analyze_wildcard": True, "default_field": "*" } }, "web_activity_invite_20180428_share_page_download_button_click": { "query_string": { "query": "web_activity_invite_20180428_share_page_download_button_click", "analyze_wildcard": True, "default_field": "*" } } } }, "aggs": { "uv_device_id": { "cardinality": { "field": "device_id.keyword" } }, "uv_ip": { "cardinality": { "field": "ip.keyword" } } } } } } return search_data
def get_app_stay(query={}, query_user={}, nday=1): app_stay = {} app_stay_first = {} start = time_tool.get_weehours_of_someday(-nday) time_range = { "range": { "@timestamp": { "gte": start * 1000, "lte": (start + 86400) * 1000 - 1, "format": "epoch_millis" } } } query_user["query"]["bool"]["must"] = [time_range] r = requests.post(URL_ELASTICSEARCH_USER, headers=JSON_HEADER, data=json.dumps(query_user), timeout=(30, 60)) user_channel = {} user_channel["all_channel"] = [] if 200 == r.status_code: r_json = r.json() for v in r_json['aggregations']['per_channel']['buckets']: arr = [user_id['key'] for user_id in v['user_id']['buckets']] user_channel[v['key']] = arr user_channel["all_channel"].extend(arr) else: logger.error("request user index failed, status_code:%d, reason:%s", r.status_code, r.reason) query["query"]["bool"]["must"].append(time_range) for k, v in user_channel.items(): query["query"]["bool"]["filter"]["terms"]["user_id.keyword"] = v r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER, data=json.dumps(query), timeout=(30, 60)) if 200 == r.status_code: r_json = r.json() if len(v) > 0: app_stay[k] = r_json["aggregations"]["sum_time"]["value"] / \ len(v) else: logger.error( "request user index failed, status_code:%d, reason:%s", r.status_code, r.reason) query_is_first = {"match_phrase": {"is_first": {"query": "true"}}} query["query"]["bool"]["must"].append(query_is_first) for k, v in user_channel.items(): query["query"]["bool"]["filter"]["terms"]["user_id.keyword"] = v r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER, data=json.dumps(query), timeout=(30, 60)) if 200 == r.status_code: r_json = r.json() if len(v) > 0: app_stay_first[k] = r_json["aggregations"]["sum_time"]["value"] / \ len(v) else: logger.error( "request user index failed, status_code:%d, reason:%s", r.status_code, r.reason) return app_stay, app_stay_first
def get_user_device_count(query_online_user={}, nday=1): start = time_tool.get_weehours_of_someday(-nday) online = {} for i in range(288): # calculate every 5 minutes query_online_user["query"]["bool"]["must"] = [{ "range": { "@timestamp": { "gte": (start + i * 300) * 1000, "lte": (start + (i + 1) * 300) * 1000 - 1, "format": "epoch_millis" } } }] r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER, data=json.dumps(query_online_user), timeout=(60, 120)) timestamp = start + (i + 1) * 300 time_array = time.localtime(timestamp) key = time.strftime("%Y-%m-%d_%H-%M-%S", time_array) online[key] = {} online[key]["@timestamp"] = time.strftime("%Y-%m-%dT%H:%M:%S+08:00", time_array) if 200 == r.status_code: r_json = r.json() online[key]["num_user"] = r_json["aggregations"]["count"]["value"] else: logger.error( "request applog index failed, status_code:%d, reason:%s", r.status_code, r.reason) query_online_user["aggs"]["count"]["cardinality"][ "field"] = "device_id.keyword" query_online_user["query"]["bool"]["must_not"] = [{ "match_phrase": { "device_id.keyword": { "query": "" } } }] for i in range(288): query_online_user["query"]["bool"]["must"] = [{ "range": { "@timestamp": { "gte": (start + i * 300) * 1000, "lte": (start + (i + 1) * 300) * 1000 - 1, "format": "epoch_millis" } } }] r = requests.post(URL_ELASTICSEARCH_APPLOG, headers=JSON_HEADER, data=json.dumps(query_online_user), timeout=(60, 120)) if 200 == r.status_code: r_json = r.json() timestamp = start + (i + 1) * 300 time_array = time.localtime(timestamp) key = time.strftime("%Y-%m-%d_%H-%M-%S", time_array) online[key]["num_device"] = r_json["aggregations"]["count"][ "value"] else: logger.error( "request applog index failed, status_code:%d, reason:%s", r.status_code, r.reason) return online