def add_reports(resource_id, request_params, dataset, **kwargs): proto_version = parse_proto(request_params.get('protocol_version', '')) current_time = datetime.utcnow().replace(second=0, microsecond=0) try: # we will store solr docs here for single insert es_report_docs = {} es_report_group_docs = {} resource = ApplicationService.by_id(resource_id) tags = [] es_slow_calls_docs = {} es_reports_stats_rows = {} for report_data in dataset: # build report details for later added_details = 0 report = Report() report.set_data(report_data, resource, proto_version) report._skip_ft_index = True # find latest group in this months partition report_group = ReportGroupService.by_hash_and_resource( report.resource_id, report.grouping_hash, since_when=datetime.utcnow().date().replace(day=1)) occurences = report_data.get('occurences', 1) if not report_group: # total reports will be +1 moment later report_group = ReportGroup(grouping_hash=report.grouping_hash, occurences=0, total_reports=0, last_report=0, priority=report.priority, error=report.error, first_timestamp=report.start_time) report_group._skip_ft_index = True report_group.report_type = report.report_type report.report_group_time = report_group.first_timestamp add_sample = pick_sample(report_group.occurences, report_type=report_group.report_type) if add_sample: resource.report_groups.append(report_group) report_group.reports.append(report) added_details += 1 DBSession.flush() if report.partition_id not in es_report_docs: es_report_docs[report.partition_id] = [] es_report_docs[report.partition_id].append(report.es_doc()) tags.extend(list(report.tags.items())) slow_calls = report.add_slow_calls(report_data, report_group) DBSession.flush() for s_call in slow_calls: if s_call.partition_id not in es_slow_calls_docs: es_slow_calls_docs[s_call.partition_id] = [] es_slow_calls_docs[s_call.partition_id].append( s_call.es_doc()) # try generating new stat rows if needed else: # required for postprocessing to not fail later report.report_group = report_group stat_row = ReportService.generate_stat_rows( report, resource, report_group) if stat_row.partition_id not in es_reports_stats_rows: es_reports_stats_rows[stat_row.partition_id] = [] es_reports_stats_rows[stat_row.partition_id].append( stat_row.es_doc()) # see if we should mark 10th occurence of report last_occurences_10 = int(math.floor(report_group.occurences / 10)) curr_occurences_10 = int( math.floor((report_group.occurences + report.occurences) / 10)) last_occurences_100 = int(math.floor(report_group.occurences / 100)) curr_occurences_100 = int( math.floor( (report_group.occurences + report.occurences) / 100)) notify_occurences_10 = last_occurences_10 != curr_occurences_10 notify_occurences_100 = last_occurences_100 != curr_occurences_100 report_group.occurences = ReportGroup.occurences + occurences report_group.last_timestamp = report.start_time report_group.summed_duration = ReportGroup.summed_duration + report.duration summed_duration = ReportGroup.summed_duration + report.duration summed_occurences = ReportGroup.occurences + occurences report_group.average_duration = summed_duration / summed_occurences report_group.run_postprocessing(report) if added_details: report_group.total_reports = ReportGroup.total_reports + 1 report_group.last_report = report.id report_group.set_notification_info( notify_10=notify_occurences_10, notify_100=notify_occurences_100) DBSession.flush() report_group.get_report().notify_channel(report_group) if report_group.partition_id not in es_report_group_docs: es_report_group_docs[report_group.partition_id] = [] es_report_group_docs[report_group.partition_id].append( report_group.es_doc()) action = 'REPORT' log_msg = '%s: %s %s, client: %s, proto: %s' % ( action, report_data.get('http_status', 'unknown'), str(resource), report_data.get('client'), proto_version) log.info(log_msg) total_reports = len(dataset) redis_pipeline = Datastores.redis.pipeline(transaction=False) key = REDIS_KEYS['counters']['reports_per_minute'].format(current_time) redis_pipeline.incr(key, total_reports) redis_pipeline.expire(key, 3600 * 24) key = REDIS_KEYS['counters']['events_per_minute_per_user'].format( resource.owner_user_id, current_time) redis_pipeline.incr(key, total_reports) redis_pipeline.expire(key, 3600) key = REDIS_KEYS['counters']['reports_per_hour_per_app'].format( resource_id, current_time.replace(minute=0)) redis_pipeline.incr(key, total_reports) redis_pipeline.expire(key, 3600 * 24 * 7) redis_pipeline.sadd( REDIS_KEYS['apps_that_got_new_data_per_hour'].format( current_time.replace(minute=0)), resource_id) redis_pipeline.execute() add_reports_es(es_report_group_docs, es_report_docs) add_reports_slow_calls_es(es_slow_calls_docs) add_reports_stats_rows_es(es_reports_stats_rows) return True except Exception as exc: print_traceback(log) add_reports.retry(exc=exc)
def alert_chart(pkey, chart_uuid): start = datetime.utcnow() request = get_current_request() alert_action = AlertChannelActionService.by_pkey(pkey) chart = DashboardChartService.by_uuid(chart_uuid) chart.migrate_json_config() resource = chart.dashboard json_body = chart.config ids_to_override = [json_body["resource"]] filter_settings = build_filter_settings_from_chart_config( request, json_body, override_app_ids=ids_to_override ) log.warning("alert_chart, resource:{}, chart:{}".format(resource, chart_uuid)) # determine start and end date for dataset start_date, end_date = determine_date_boundries_json(json_body) if not filter_settings["start_date"]: filter_settings["start_date"] = start_date.replace( hour=0, minute=0, second=0, microsecond=0 ) if not filter_settings["end_date"]: filter_settings["end_date"] = end_date event_type = Event.types["chart_alert"] open_event = None latest_closed_event = None events_query = EventService.for_resource( [resource.resource_id], event_type=event_type, target_uuid=chart_uuid, limit=20 ) for event in events_query: if event.status == Event.statuses["active"] and not open_event: open_event = event if event.status == Event.statuses["closed"] and not latest_closed_event: latest_closed_event = event if latest_closed_event: filter_settings["start_date"] = latest_closed_event.end_date es_config = transform_json_to_es_config( request, json_body, filter_settings, ids_to_override=ids_to_override ) if not es_config["index_names"]: return result = Datastores.es.search( body=es_config["query"], index=es_config["index_names"], doc_type="log", size=0 ) series, info_dict = parse_es_result(result, es_config, json_config=json_body) # we need to make a deepcopy since we will mutate it rule_config = copy.deepcopy(alert_action.rule) field_mappings = alert_action.config rule_obj = RuleService.rule_from_config( rule_config, field_mappings, info_dict["system_labels"] ) matched_interval = None finished_interval = None for step in reversed(series): if rule_obj.match(step): log.info("matched start") if not matched_interval: matched_interval = step break else: finished_interval = step if matched_interval: if open_event: log.info("ALERT: PROGRESS: %s %s" % (event_type, resource)) if finished_interval: open_event.values = copy.deepcopy(open_event.values) end_interval = finished_interval["key"].strftime(DATE_FORMAT) open_event.values["end_interval"] = end_interval open_event.close() else: log.warning("ALERT: OPEN: %s %s" % (event_type, resource)) step_size = None parent_agg = json_body.get("parentAgg") if parent_agg and parent_agg["type"] == "time_histogram": step_size = time_deltas[parent_agg["config"]["interval"]][ "delta" ].total_seconds() matched_step_values = { "values": matched_interval, "labels": info_dict["system_labels"], } values_dict = { "matched_rule": alert_action.get_dict(), "matched_step_values": matched_step_values, "start_interval": step["key"], "end_interval": None, "resource": chart.config.get("resource"), "chart_name": chart.name, "chart_uuid": chart_uuid, "step_size": step_size, "action_name": alert_action.name, } new_event = Event( resource_id=resource.resource_id, event_type=event_type, status=Event.statuses["active"], values=values_dict, target_uuid=chart_uuid, ) DBSession.add(new_event) DBSession.flush() new_event.send_alerts(request=request, resource=resource) elif open_event: if finished_interval: open_event.values = copy.deepcopy(open_event.values) end_interval = finished_interval["key"].strftime(DATE_FORMAT) open_event.values["end_interval"] = end_interval open_event.close() took = datetime.utcnow() - start log.warning("chart alert rule check took: {}".format(took))
def add_logs(resource_id, request_params, dataset, **kwargs): proto_version = request_params.get('protocol_version') current_time = datetime.utcnow().replace(second=0, microsecond=0) try: es_docs = collections.defaultdict(list) resource = ApplicationService.by_id_cached()(resource_id) resource = DBSession.merge(resource, load=False) ns_pairs = [] for entry in dataset: # gather pk and ns so we can remove older versions of row later if entry['primary_key'] is not None: ns_pairs.append({ "pk": entry['primary_key'], "ns": entry['namespace'] }) log_entry = Log() log_entry.set_data(entry, resource=resource) log_entry._skip_ft_index = True resource.logs.append(log_entry) DBSession.flush() # insert non pk rows first if entry['primary_key'] is None: es_docs[log_entry.partition_id].append(log_entry.es_doc()) # 2nd pass to delete all log entries from db foe same pk/ns pair if ns_pairs: ids_to_delete = [] es_docs = collections.defaultdict(list) es_docs_to_delete = collections.defaultdict(list) found_pkey_logs = LogService.query_by_primary_key_and_namespace( list_of_pairs=ns_pairs) log_dict = {} for log_entry in found_pkey_logs: log_key = (log_entry.primary_key, log_entry.namespace) if log_key not in log_dict: log_dict[log_key] = [] log_dict[log_key].append(log_entry) for ns, entry_list in log_dict.items(): entry_list = sorted(entry_list, key=lambda x: x.timestamp) # newest row needs to be indexed in es log_entry = entry_list[-1] # delete everything from pg and ES, leave the last row in pg for e in entry_list[:-1]: ids_to_delete.append(e.log_id) es_docs_to_delete[e.partition_id].append(e.delete_hash) es_docs_to_delete[log_entry.partition_id].append( log_entry.delete_hash) es_docs[log_entry.partition_id].append(log_entry.es_doc()) if ids_to_delete: query = DBSession.query(Log).filter( Log.log_id.in_(ids_to_delete)) query.delete(synchronize_session=False) if es_docs_to_delete: # batch this to avoid problems with default ES bulk limits for es_index in es_docs_to_delete.keys(): for batch in in_batches(es_docs_to_delete[es_index], 20): query = {'terms': {'delete_hash': batch}} try: Datastores.es.delete_by_query( es_index, 'log', query) except pyelasticsearch.ElasticHttpNotFoundError as exc: msg = 'skipping index {}'.format(es_index) log.info(msg) total_logs = len(dataset) log_msg = 'LOG_NEW: %s, entries: %s, proto:%s' % ( str(resource), total_logs, proto_version) log.info(log_msg) # mark_changed(session) redis_pipeline = Datastores.redis.pipeline(transaction=False) key = REDIS_KEYS['counters']['logs_per_minute'].format(current_time) redis_pipeline.incr(key, total_logs) redis_pipeline.expire(key, 3600 * 24) key = REDIS_KEYS['counters']['events_per_minute_per_user'].format( resource.owner_user_id, current_time) redis_pipeline.incr(key, total_logs) redis_pipeline.expire(key, 3600) key = REDIS_KEYS['counters']['logs_per_hour_per_app'].format( resource_id, current_time.replace(minute=0)) redis_pipeline.incr(key, total_logs) redis_pipeline.expire(key, 3600 * 24 * 7) redis_pipeline.sadd( REDIS_KEYS['apps_that_got_new_data_per_hour'].format( current_time.replace(minute=0)), resource_id) redis_pipeline.execute() add_logs_es(es_docs) return True except Exception as exc: print_traceback(log) add_logs.retry(exc=exc)