Пример #1
0
def add_reports(resource_id, request_params, dataset, **kwargs):
    proto_version = parse_proto(request_params.get('protocol_version', ''))
    current_time = datetime.utcnow().replace(second=0, microsecond=0)
    try:
        # we will store solr docs here for single insert
        es_report_docs = {}
        es_report_group_docs = {}
        resource = ApplicationService.by_id(resource_id)

        tags = []
        es_slow_calls_docs = {}
        es_reports_stats_rows = {}
        for report_data in dataset:
            # build report details for later
            added_details = 0
            report = Report()
            report.set_data(report_data, resource, proto_version)
            report._skip_ft_index = True

            # find latest group in this months partition
            report_group = ReportGroupService.by_hash_and_resource(
                report.resource_id,
                report.grouping_hash,
                since_when=datetime.utcnow().date().replace(day=1))
            occurences = report_data.get('occurences', 1)
            if not report_group:
                # total reports will be +1 moment later
                report_group = ReportGroup(grouping_hash=report.grouping_hash,
                                           occurences=0,
                                           total_reports=0,
                                           last_report=0,
                                           priority=report.priority,
                                           error=report.error,
                                           first_timestamp=report.start_time)
                report_group._skip_ft_index = True
                report_group.report_type = report.report_type
            report.report_group_time = report_group.first_timestamp
            add_sample = pick_sample(report_group.occurences,
                                     report_type=report_group.report_type)
            if add_sample:
                resource.report_groups.append(report_group)
                report_group.reports.append(report)
                added_details += 1
                DBSession.flush()
                if report.partition_id not in es_report_docs:
                    es_report_docs[report.partition_id] = []
                es_report_docs[report.partition_id].append(report.es_doc())
                tags.extend(list(report.tags.items()))
                slow_calls = report.add_slow_calls(report_data, report_group)
                DBSession.flush()
                for s_call in slow_calls:
                    if s_call.partition_id not in es_slow_calls_docs:
                        es_slow_calls_docs[s_call.partition_id] = []
                    es_slow_calls_docs[s_call.partition_id].append(
                        s_call.es_doc())
                    # try generating new stat rows if needed
            else:
                # required for postprocessing to not fail later
                report.report_group = report_group

            stat_row = ReportService.generate_stat_rows(
                report, resource, report_group)
            if stat_row.partition_id not in es_reports_stats_rows:
                es_reports_stats_rows[stat_row.partition_id] = []
            es_reports_stats_rows[stat_row.partition_id].append(
                stat_row.es_doc())

            # see if we should mark 10th occurence of report
            last_occurences_10 = int(math.floor(report_group.occurences / 10))
            curr_occurences_10 = int(
                math.floor((report_group.occurences + report.occurences) / 10))
            last_occurences_100 = int(math.floor(report_group.occurences /
                                                 100))
            curr_occurences_100 = int(
                math.floor(
                    (report_group.occurences + report.occurences) / 100))
            notify_occurences_10 = last_occurences_10 != curr_occurences_10
            notify_occurences_100 = last_occurences_100 != curr_occurences_100
            report_group.occurences = ReportGroup.occurences + occurences
            report_group.last_timestamp = report.start_time
            report_group.summed_duration = ReportGroup.summed_duration + report.duration
            summed_duration = ReportGroup.summed_duration + report.duration
            summed_occurences = ReportGroup.occurences + occurences
            report_group.average_duration = summed_duration / summed_occurences
            report_group.run_postprocessing(report)
            if added_details:
                report_group.total_reports = ReportGroup.total_reports + 1
                report_group.last_report = report.id
            report_group.set_notification_info(
                notify_10=notify_occurences_10,
                notify_100=notify_occurences_100)
            DBSession.flush()
            report_group.get_report().notify_channel(report_group)
            if report_group.partition_id not in es_report_group_docs:
                es_report_group_docs[report_group.partition_id] = []
            es_report_group_docs[report_group.partition_id].append(
                report_group.es_doc())

            action = 'REPORT'
            log_msg = '%s: %s %s, client: %s, proto: %s' % (
                action, report_data.get('http_status', 'unknown'),
                str(resource), report_data.get('client'), proto_version)
            log.info(log_msg)
        total_reports = len(dataset)
        redis_pipeline = Datastores.redis.pipeline(transaction=False)
        key = REDIS_KEYS['counters']['reports_per_minute'].format(current_time)
        redis_pipeline.incr(key, total_reports)
        redis_pipeline.expire(key, 3600 * 24)
        key = REDIS_KEYS['counters']['events_per_minute_per_user'].format(
            resource.owner_user_id, current_time)
        redis_pipeline.incr(key, total_reports)
        redis_pipeline.expire(key, 3600)
        key = REDIS_KEYS['counters']['reports_per_hour_per_app'].format(
            resource_id, current_time.replace(minute=0))
        redis_pipeline.incr(key, total_reports)
        redis_pipeline.expire(key, 3600 * 24 * 7)
        redis_pipeline.sadd(
            REDIS_KEYS['apps_that_got_new_data_per_hour'].format(
                current_time.replace(minute=0)), resource_id)
        redis_pipeline.execute()

        add_reports_es(es_report_group_docs, es_report_docs)
        add_reports_slow_calls_es(es_slow_calls_docs)
        add_reports_stats_rows_es(es_reports_stats_rows)
        return True
    except Exception as exc:
        print_traceback(log)
        add_reports.retry(exc=exc)
Пример #2
0
def alert_chart(pkey, chart_uuid):
    start = datetime.utcnow()
    request = get_current_request()
    alert_action = AlertChannelActionService.by_pkey(pkey)
    chart = DashboardChartService.by_uuid(chart_uuid)
    chart.migrate_json_config()
    resource = chart.dashboard
    json_body = chart.config
    ids_to_override = [json_body["resource"]]
    filter_settings = build_filter_settings_from_chart_config(
        request, json_body, override_app_ids=ids_to_override
    )

    log.warning("alert_chart, resource:{}, chart:{}".format(resource, chart_uuid))

    # determine start and end date for dataset
    start_date, end_date = determine_date_boundries_json(json_body)
    if not filter_settings["start_date"]:
        filter_settings["start_date"] = start_date.replace(
            hour=0, minute=0, second=0, microsecond=0
        )

    if not filter_settings["end_date"]:
        filter_settings["end_date"] = end_date

    event_type = Event.types["chart_alert"]
    open_event = None
    latest_closed_event = None
    events_query = EventService.for_resource(
        [resource.resource_id], event_type=event_type, target_uuid=chart_uuid, limit=20
    )

    for event in events_query:
        if event.status == Event.statuses["active"] and not open_event:
            open_event = event
        if event.status == Event.statuses["closed"] and not latest_closed_event:
            latest_closed_event = event

    if latest_closed_event:
        filter_settings["start_date"] = latest_closed_event.end_date

    es_config = transform_json_to_es_config(
        request, json_body, filter_settings, ids_to_override=ids_to_override
    )

    if not es_config["index_names"]:
        return
    result = Datastores.es.search(
        body=es_config["query"], index=es_config["index_names"], doc_type="log", size=0
    )
    series, info_dict = parse_es_result(result, es_config, json_config=json_body)

    # we need to make a deepcopy since we will mutate it
    rule_config = copy.deepcopy(alert_action.rule)
    field_mappings = alert_action.config

    rule_obj = RuleService.rule_from_config(
        rule_config, field_mappings, info_dict["system_labels"]
    )
    matched_interval = None
    finished_interval = None
    for step in reversed(series):
        if rule_obj.match(step):
            log.info("matched start")
            if not matched_interval:
                matched_interval = step
                break
        else:
            finished_interval = step

    if matched_interval:
        if open_event:
            log.info("ALERT: PROGRESS: %s %s" % (event_type, resource))
            if finished_interval:
                open_event.values = copy.deepcopy(open_event.values)
                end_interval = finished_interval["key"].strftime(DATE_FORMAT)
                open_event.values["end_interval"] = end_interval
                open_event.close()
        else:
            log.warning("ALERT: OPEN: %s %s" % (event_type, resource))
            step_size = None
            parent_agg = json_body.get("parentAgg")
            if parent_agg and parent_agg["type"] == "time_histogram":
                step_size = time_deltas[parent_agg["config"]["interval"]][
                    "delta"
                ].total_seconds()
            matched_step_values = {
                "values": matched_interval,
                "labels": info_dict["system_labels"],
            }
            values_dict = {
                "matched_rule": alert_action.get_dict(),
                "matched_step_values": matched_step_values,
                "start_interval": step["key"],
                "end_interval": None,
                "resource": chart.config.get("resource"),
                "chart_name": chart.name,
                "chart_uuid": chart_uuid,
                "step_size": step_size,
                "action_name": alert_action.name,
            }
            new_event = Event(
                resource_id=resource.resource_id,
                event_type=event_type,
                status=Event.statuses["active"],
                values=values_dict,
                target_uuid=chart_uuid,
            )
            DBSession.add(new_event)
            DBSession.flush()
            new_event.send_alerts(request=request, resource=resource)
    elif open_event:
        if finished_interval:
            open_event.values = copy.deepcopy(open_event.values)
            end_interval = finished_interval["key"].strftime(DATE_FORMAT)
            open_event.values["end_interval"] = end_interval
        open_event.close()
    took = datetime.utcnow() - start
    log.warning("chart alert rule check took: {}".format(took))
Пример #3
0
def add_logs(resource_id, request_params, dataset, **kwargs):
    proto_version = request_params.get('protocol_version')
    current_time = datetime.utcnow().replace(second=0, microsecond=0)

    try:
        es_docs = collections.defaultdict(list)
        resource = ApplicationService.by_id_cached()(resource_id)
        resource = DBSession.merge(resource, load=False)
        ns_pairs = []
        for entry in dataset:
            # gather pk and ns so we can remove older versions of row later
            if entry['primary_key'] is not None:
                ns_pairs.append({
                    "pk": entry['primary_key'],
                    "ns": entry['namespace']
                })
            log_entry = Log()
            log_entry.set_data(entry, resource=resource)
            log_entry._skip_ft_index = True
            resource.logs.append(log_entry)
            DBSession.flush()
            # insert non pk rows first
            if entry['primary_key'] is None:
                es_docs[log_entry.partition_id].append(log_entry.es_doc())

        # 2nd pass to delete all log entries from db foe same pk/ns pair
        if ns_pairs:
            ids_to_delete = []
            es_docs = collections.defaultdict(list)
            es_docs_to_delete = collections.defaultdict(list)
            found_pkey_logs = LogService.query_by_primary_key_and_namespace(
                list_of_pairs=ns_pairs)
            log_dict = {}
            for log_entry in found_pkey_logs:
                log_key = (log_entry.primary_key, log_entry.namespace)
                if log_key not in log_dict:
                    log_dict[log_key] = []
                log_dict[log_key].append(log_entry)

            for ns, entry_list in log_dict.items():
                entry_list = sorted(entry_list, key=lambda x: x.timestamp)
                # newest row needs to be indexed in es
                log_entry = entry_list[-1]
                # delete everything from pg and ES, leave the last row in pg
                for e in entry_list[:-1]:
                    ids_to_delete.append(e.log_id)
                    es_docs_to_delete[e.partition_id].append(e.delete_hash)

                es_docs_to_delete[log_entry.partition_id].append(
                    log_entry.delete_hash)

                es_docs[log_entry.partition_id].append(log_entry.es_doc())

            if ids_to_delete:
                query = DBSession.query(Log).filter(
                    Log.log_id.in_(ids_to_delete))
                query.delete(synchronize_session=False)
            if es_docs_to_delete:
                # batch this to avoid problems with default ES bulk limits
                for es_index in es_docs_to_delete.keys():
                    for batch in in_batches(es_docs_to_delete[es_index], 20):
                        query = {'terms': {'delete_hash': batch}}

                        try:
                            Datastores.es.delete_by_query(
                                es_index, 'log', query)
                        except pyelasticsearch.ElasticHttpNotFoundError as exc:
                            msg = 'skipping index {}'.format(es_index)
                            log.info(msg)

        total_logs = len(dataset)

        log_msg = 'LOG_NEW: %s, entries: %s, proto:%s' % (
            str(resource), total_logs, proto_version)
        log.info(log_msg)
        # mark_changed(session)
        redis_pipeline = Datastores.redis.pipeline(transaction=False)
        key = REDIS_KEYS['counters']['logs_per_minute'].format(current_time)
        redis_pipeline.incr(key, total_logs)
        redis_pipeline.expire(key, 3600 * 24)
        key = REDIS_KEYS['counters']['events_per_minute_per_user'].format(
            resource.owner_user_id, current_time)
        redis_pipeline.incr(key, total_logs)
        redis_pipeline.expire(key, 3600)
        key = REDIS_KEYS['counters']['logs_per_hour_per_app'].format(
            resource_id, current_time.replace(minute=0))
        redis_pipeline.incr(key, total_logs)
        redis_pipeline.expire(key, 3600 * 24 * 7)
        redis_pipeline.sadd(
            REDIS_KEYS['apps_that_got_new_data_per_hour'].format(
                current_time.replace(minute=0)), resource_id)
        redis_pipeline.execute()
        add_logs_es(es_docs)
        return True
    except Exception as exc:
        print_traceback(log)
        add_logs.retry(exc=exc)