Exemplo n.º 1
0
def general_metrics_create(request):
    """
    Endpoint for general metrics aggregation
    """
    application = request.context.resource
    if request.method.upper() == "OPTIONS":
        return check_cors(request, application)
    else:
        check_cors(request, application, should_return=False)

    params = dict(request.params.copy())
    proto_version = parse_proto(params.get("protocol_version", ""))
    payload = request.unsafe_json_body
    sequence_accepted = request.matched_route.name == "api_general_metrics"
    if sequence_accepted:
        if application.allow_permanent_storage:
            schema = GeneralMetricsPermanentListSchema().bind(
                utcnow=datetime.datetime.utcnow()
            )
        else:
            schema = GeneralMetricsListSchema().bind(utcnow=datetime.datetime.utcnow())
    else:
        if application.allow_permanent_storage:
            schema = GeneralMetricPermanentSchema().bind(
                utcnow=datetime.datetime.utcnow()
            )
        else:
            schema = GeneralMetricSchema().bind(utcnow=datetime.datetime.utcnow())

    deserialized_metrics = schema.deserialize(payload)
    if sequence_accepted is False:
        deserialized_metrics = [deserialized_metrics]

    rate_limiting(
        request,
        application,
        "per_application_metrics_rate_limit",
        len(deserialized_metrics),
    )

    tasks.add_metrics.delay(
        application.resource_id, params, deserialized_metrics, proto_version
    )

    log.info(
        "METRICS call {} {} client:{}".format(
            application.resource_name, proto_version, request.headers.get("user_agent")
        )
    )
    return "OK: Metrics accepted"
Exemplo n.º 2
0
def add_uptime_stats(params, metric):
    proto_version = parse_proto(params.get("protocol_version"))
    try:
        application = ApplicationService.by_id_cached()(metric["resource_id"])
        application = DBSession.merge(application, load=False)
        if not application:
            return
        start_interval = convert_date(metric["timestamp"])
        start_interval = start_interval.replace(second=0, microsecond=0)
        new_metric = UptimeMetric(
            start_interval=start_interval,
            response_time=metric["response_time"],
            status_code=metric["status_code"],
            is_ok=metric["is_ok"],
            location=metric.get("location", 1),
            tries=metric["tries"],
            resource_id=application.resource_id,
            owner_user_id=application.owner_user_id,
        )
        DBSession.add(new_metric)
        DBSession.flush()
        add_metrics_uptime([new_metric.es_doc()])
        if metric["is_ok"]:
            event_types = [Event.types["uptime_alert"]]
            statuses = [Event.statuses["active"]]
            # get events older than 5 min
            events = EventService.by_type_and_status(
                event_types,
                statuses,
                older_than=(datetime.utcnow() - timedelta(minutes=6)),
                app_ids=[application.resource_id],
            )
            for event in events:
                event.close()
        else:
            UptimeMetricService.check_for_alert(application, metric=metric)
        action = "METRICS UPTIME"
        metrics_msg = "%s: %s, proto:%s" % (action, str(application),
                                            proto_version)
        log.info(metrics_msg)
        session = DBSession()
        mark_changed(session)
        return True
    except Exception as exc:
        print_traceback(log)
        add_uptime_stats.retry(exc=exc)
Exemplo n.º 3
0
def reports_create(request):
    """
    Endpoint for exception and slowness reports
    """
    # route_url('reports')
    application = request.context.resource
    if request.method.upper() == 'OPTIONS':
        return check_cors(request, application)
    else:
        check_cors(request, application, should_return=False)
    params = dict(request.params.copy())
    proto_version = parse_proto(params.get('protocol_version', ''))
    payload = request.unsafe_json_body
    sequence_accepted = request.matched_route.name == 'api_reports'

    if sequence_accepted:
        schema = ReportListSchema_0_5().bind(
            utcnow=datetime.datetime.utcnow())
    else:
        schema = ReportSchema_0_5().bind(
            utcnow=datetime.datetime.utcnow())

    deserialized_reports = schema.deserialize(payload)
    if sequence_accepted is False:
        deserialized_reports = [deserialized_reports]
    if deserialized_reports:
        rate_limiting(request, application,
                      'per_application_reports_rate_limit',
                      len(deserialized_reports))

        # pprint.pprint(deserialized_reports)
        tasks.add_reports.delay(application.resource_id, params,
                                deserialized_reports)
    log.info('REPORT call  %s, %s client:%s' % (
        application,
        proto_version,
        request.headers.get('user_agent'))
             )
    return 'OK: Reports accepted'
Exemplo n.º 4
0
def logs_create(request):
    """
    Endpoint for log aggregation
    """
    application = request.context.resource
    if request.method.upper() == "OPTIONS":
        return check_cors(request, application)
    else:
        check_cors(request, application, should_return=False)

    params = dict(request.params.copy())
    proto_version = parse_proto(params.get("protocol_version", ""))
    payload = request.unsafe_json_body
    sequence_accepted = request.matched_route.name == "api_logs"

    if sequence_accepted:
        if application.allow_permanent_storage:
            schema = LogListPermanentSchema().bind(utcnow=datetime.datetime.utcnow())
        else:
            schema = LogListSchema().bind(utcnow=datetime.datetime.utcnow())
    else:
        if application.allow_permanent_storage:
            schema = LogSchemaPermanent().bind(utcnow=datetime.datetime.utcnow())
        else:
            schema = LogSchema().bind(utcnow=datetime.datetime.utcnow())

    deserialized_logs = schema.deserialize(payload)
    if sequence_accepted is False:
        deserialized_logs = [deserialized_logs]

    rate_limiting(
        request, application, "per_application_logs_rate_limit", len(deserialized_logs)
    )

    # pprint.pprint(deserialized_logs)

    # we need to split those out so we can process the pkey ones one by one
    non_pkey_logs = [
        log_dict for log_dict in deserialized_logs if not log_dict["primary_key"]
    ]
    pkey_dict = {}
    # try to process the logs as best as we can and group together to reduce
    # the amount of
    for log_dict in deserialized_logs:
        if log_dict["primary_key"]:
            key = (log_dict["primary_key"], log_dict["namespace"])
            if not key in pkey_dict:
                pkey_dict[key] = []
            pkey_dict[key].append(log_dict)

    if non_pkey_logs:
        log.debug("%s non-pkey logs received: %s" % (application, len(non_pkey_logs)))
        tasks.add_logs.delay(application.resource_id, params, non_pkey_logs)
    if pkey_dict:
        logs_to_insert = []
        for primary_key_tuple, payload in pkey_dict.items():
            sorted_logs = sorted(payload, key=lambda x: x["date"])
            logs_to_insert.append(sorted_logs[-1])
        log.debug("%s pkey logs received: %s" % (application, len(logs_to_insert)))
        tasks.add_logs.delay(application.resource_id, params, logs_to_insert)

    log.info(
        "LOG call %s %s client:%s"
        % (application, proto_version, request.headers.get("user_agent"))
    )
    return "OK: Logs accepted"
Exemplo n.º 5
0
def request_metrics_create(request):
    """
    Endpoint for performance metrics, aggregates view performance stats
    and converts them to general metric row
    """
    application = request.context.resource
    if request.method.upper() == "OPTIONS":
        return check_cors(request, application)
    else:
        check_cors(request, application, should_return=False)

    params = dict(request.params.copy())
    proto_version = parse_proto(params.get("protocol_version", ""))

    payload = request.unsafe_json_body
    schema = MetricsListSchema()
    dataset = schema.deserialize(payload)

    rate_limiting(
        request, application, "per_application_metrics_rate_limit", len(dataset)
    )

    # looping report data
    metrics = {}
    for metric in dataset:
        server_name = metric.get("server", "").lower() or "unknown"
        start_interval = convert_date(metric["timestamp"])
        start_interval = start_interval.replace(second=0, microsecond=0)

        for view_name, view_metrics in metric["metrics"]:
            key = "%s%s%s" % (metric["server"], start_interval, view_name)
            if start_interval not in metrics:
                metrics[key] = {
                    "requests": 0,
                    "main": 0,
                    "sql": 0,
                    "nosql": 0,
                    "remote": 0,
                    "tmpl": 0,
                    "custom": 0,
                    "sql_calls": 0,
                    "nosql_calls": 0,
                    "remote_calls": 0,
                    "tmpl_calls": 0,
                    "custom_calls": 0,
                    "start_interval": start_interval,
                    "server_name": server_name,
                    "view_name": view_name,
                }
            metrics[key]["requests"] += int(view_metrics["requests"])
            metrics[key]["main"] += round(view_metrics["main"], 5)
            metrics[key]["sql"] += round(view_metrics["sql"], 5)
            metrics[key]["nosql"] += round(view_metrics["nosql"], 5)
            metrics[key]["remote"] += round(view_metrics["remote"], 5)
            metrics[key]["tmpl"] += round(view_metrics["tmpl"], 5)
            metrics[key]["custom"] += round(view_metrics.get("custom", 0.0), 5)
            metrics[key]["sql_calls"] += int(view_metrics.get("sql_calls", 0))
            metrics[key]["nosql_calls"] += int(view_metrics.get("nosql_calls", 0))
            metrics[key]["remote_calls"] += int(view_metrics.get("remote_calls", 0))
            metrics[key]["tmpl_calls"] += int(view_metrics.get("tmpl_calls", 0))
            metrics[key]["custom_calls"] += int(view_metrics.get("custom_calls", 0))

            if not metrics[key]["requests"]:
                # fix this here because validator can't
                metrics[key]["requests"] = 1
                # metrics dict is being built to minimize
                # the amount of queries used
                # in case we get multiple rows from same minute

    normalized_metrics = []
    for metric in metrics.values():
        new_metric = {
            "namespace": "appenlight.request_metric",
            "timestamp": metric.pop("start_interval"),
            "server_name": metric["server_name"],
            "tags": list(metric.items()),
        }
        normalized_metrics.append(new_metric)

    tasks.add_metrics.delay(
        application.resource_id, params, normalized_metrics, proto_version
    )

    log.info(
        "REQUEST METRICS call {} {} client:{}".format(
            application.resource_name, proto_version, request.headers.get("user_agent")
        )
    )
    return "OK: request metrics accepted"
Exemplo n.º 6
0
def add_reports(resource_id, request_params, dataset, **kwargs):
    proto_version = parse_proto(request_params.get("protocol_version", ""))
    current_time = datetime.utcnow().replace(second=0, microsecond=0)
    try:
        # we will store solr docs here for single insert
        es_report_docs = {}
        es_report_group_docs = {}
        resource = ApplicationService.by_id(resource_id)

        tags = []
        es_slow_calls_docs = {}
        es_reports_stats_rows = {}
        for report_data in dataset:
            # build report details for later
            added_details = 0
            report = Report()
            report.set_data(report_data, resource, proto_version)
            report._skip_ft_index = True

            # find latest group in this months partition
            report_group = ReportGroupService.by_hash_and_resource(
                report.resource_id,
                report.grouping_hash,
                since_when=datetime.utcnow().date().replace(day=1),
            )
            occurences = report_data.get("occurences", 1)
            if not report_group:
                # total reports will be +1 moment later
                report_group = ReportGroup(
                    grouping_hash=report.grouping_hash,
                    occurences=0,
                    total_reports=0,
                    last_report=0,
                    priority=report.priority,
                    error=report.error,
                    first_timestamp=report.start_time,
                )
                report_group._skip_ft_index = True
                report_group.report_type = report.report_type
            report.report_group_time = report_group.first_timestamp
            add_sample = pick_sample(report_group.occurences,
                                     report_type=report_group.report_type)
            if add_sample:
                resource.report_groups.append(report_group)
                report_group.reports.append(report)
                added_details += 1
                DBSession.flush()
                if report.partition_id not in es_report_docs:
                    es_report_docs[report.partition_id] = []
                es_report_docs[report.partition_id].append(report.es_doc())
                tags.extend(list(report.tags.items()))
                slow_calls = report.add_slow_calls(report_data, report_group)
                DBSession.flush()
                for s_call in slow_calls:
                    if s_call.partition_id not in es_slow_calls_docs:
                        es_slow_calls_docs[s_call.partition_id] = []
                    es_slow_calls_docs[s_call.partition_id].append(
                        s_call.es_doc())
                    # try generating new stat rows if needed
            else:
                # required for postprocessing to not fail later
                report.report_group = report_group

            stat_row = ReportService.generate_stat_rows(
                report, resource, report_group)
            if stat_row.partition_id not in es_reports_stats_rows:
                es_reports_stats_rows[stat_row.partition_id] = []
            es_reports_stats_rows[stat_row.partition_id].append(
                stat_row.es_doc())

            # see if we should mark 10th occurence of report
            last_occurences_10 = int(math.floor(report_group.occurences / 10))
            curr_occurences_10 = int(
                math.floor((report_group.occurences + report.occurences) / 10))
            last_occurences_100 = int(math.floor(report_group.occurences /
                                                 100))
            curr_occurences_100 = int(
                math.floor(
                    (report_group.occurences + report.occurences) / 100))
            notify_occurences_10 = last_occurences_10 != curr_occurences_10
            notify_occurences_100 = last_occurences_100 != curr_occurences_100
            report_group.occurences = ReportGroup.occurences + occurences
            report_group.last_timestamp = report.start_time
            report_group.summed_duration = ReportGroup.summed_duration + report.duration
            summed_duration = ReportGroup.summed_duration + report.duration
            summed_occurences = ReportGroup.occurences + occurences
            report_group.average_duration = summed_duration / summed_occurences
            report_group.run_postprocessing(report)
            if added_details:
                report_group.total_reports = ReportGroup.total_reports + 1
                report_group.last_report = report.id
            report_group.set_notification_info(
                notify_10=notify_occurences_10,
                notify_100=notify_occurences_100)
            DBSession.flush()
            report_group.get_report().notify_channel(report_group)
            if report_group.partition_id not in es_report_group_docs:
                es_report_group_docs[report_group.partition_id] = []
            es_report_group_docs[report_group.partition_id].append(
                report_group.es_doc())

            action = "REPORT"
            log_msg = "%s: %s %s, client: %s, proto: %s" % (
                action,
                report_data.get("http_status", "unknown"),
                str(resource),
                report_data.get("client"),
                proto_version,
            )
            log.info(log_msg)
        total_reports = len(dataset)
        redis_pipeline = Datastores.redis.pipeline(transaction=False)
        key = REDIS_KEYS["counters"]["reports_per_minute"].format(current_time)
        redis_pipeline.incr(key, total_reports)
        redis_pipeline.expire(key, 3600 * 24)
        key = REDIS_KEYS["counters"]["events_per_minute_per_user"].format(
            resource.owner_user_id, current_time)
        redis_pipeline.incr(key, total_reports)
        redis_pipeline.expire(key, 3600)
        key = REDIS_KEYS["counters"]["reports_per_hour_per_app"].format(
            resource_id, current_time.replace(minute=0))
        redis_pipeline.incr(key, total_reports)
        redis_pipeline.expire(key, 3600 * 24 * 7)
        redis_pipeline.sadd(
            REDIS_KEYS["apps_that_got_new_data_per_hour"].format(
                current_time.replace(minute=0)),
            resource_id,
        )
        redis_pipeline.execute()

        add_reports_es(es_report_group_docs, es_report_docs)
        add_reports_slow_calls_es(es_slow_calls_docs)
        add_reports_stats_rows_es(es_reports_stats_rows)
        return True
    except Exception as exc:
        print_traceback(log)
        if celery.conf["CELERY_EAGER_PROPAGATES_EXCEPTIONS"]:
            raise
        add_reports.retry(exc=exc)