示例#1
0
    def test(self):
        closed_incident = create_incident(
            self.organization,
            IncidentType.CREATED,
            "Closed",
            "",
            groups=[self.group],
            date_started=timezone.now() - timedelta(days=30),
        )
        update_incident_status(closed_incident, IncidentStatus.CLOSED)
        open_incident = create_incident(
            self.organization,
            IncidentType.CREATED,
            "Open",
            "",
            groups=[self.group],
            date_started=timezone.now() - timedelta(days=30),
        )
        incidents = [closed_incident, open_incident]

        for incident, incident_stats in zip(incidents, bulk_get_incident_stats(incidents)):
            event_stats = get_incident_event_stats(incident)
            assert incident_stats["event_stats"].data["data"] == event_stats.data["data"]
            assert incident_stats["event_stats"].start == event_stats.start
            assert incident_stats["event_stats"].end == event_stats.end
            assert incident_stats["event_stats"].rollup == event_stats.rollup

            aggregates = get_incident_aggregates(incident)
            assert incident_stats["total_events"] == aggregates["count"]
            assert incident_stats["unique_users"] == aggregates["unique_users"]
示例#2
0
    def trigger_alert_threshold(self):
        """
        Called when a subscription update exceeds the value defined in the
        `alert_rule.alert_threshold`, and there is not already an active incident. Increments the
        count of how many times we've consecutively exceeded the threshold, and if
        above the `threshold_period` defined in the alert rule then create an incident.
        :return:
        """
        self.alert_triggers += 1
        if self.alert_triggers >= self.alert_rule.threshold_period:
            detected_at = to_datetime(self.last_update)
            self.active_incident = create_incident(
                self.alert_rule.organization,
                IncidentType.ALERT_TRIGGERED,
                # TODO: Include more info in name?
                self.alert_rule.name,
                alert_rule=self.alert_rule,
                # TODO: Incidents need to keep track of which metric to display
                query=self.subscription.query,
                date_started=detected_at,
                date_detected=detected_at,
                projects=[self.subscription.project],
            )
            # TODO: We should create an audit log, and maybe something that keeps
            # all of the details available for showing on the incident. Might be a json
            # blob or w/e? Or might be able to use the audit log.

            # We now set this threshold to 0. We don't need to count it anymore
            # once we've triggered an incident.
            self.alert_triggers = 0
示例#3
0
 def test_simple(self):
     status = IncidentStatus.CREATED
     title = 'hello'
     query = 'goodbye'
     date_started = timezone.now()
     other_project = self.create_project()
     other_group = self.create_group(project=other_project)
     incident = create_incident(
         self.organization,
         status=status,
         title=title,
         query=query,
         date_started=date_started,
         projects=[self.project],
         groups=[self.group, other_group],
     )
     assert incident.identifier == 1
     assert incident.status == status.value
     assert incident.title == title
     assert incident.query == query
     assert incident.date_started == date_started
     assert incident.date_detected == date_started
     assert IncidentGroup.objects.filter(
         incident=incident,
         group__in=[self.group, other_group]
     ).count() == 2
     assert IncidentProject.objects.filter(
         incident=incident,
         project__in=[self.project, other_project],
     ).count() == 2
示例#4
0
    def test_incidents_list(self):
        alert_rule = create_alert_rule(self.organization, [self.project],
                                       "hello", "level:error", "count()", 10,
                                       1)

        incident = create_incident(
            self.organization,
            type_=IncidentType.DETECTED,
            title="Incident #1",
            query="hello",
            aggregation=QueryAggregations.TOTAL,
            date_started=timezone.now(),
            date_detected=timezone.now(),
            projects=[self.project],
            groups=[self.group],
            alert_rule=alert_rule,
        )

        with self.feature(FEATURE_NAME):
            self.browser.get(self.path)
            self.browser.wait_until_not(".loading-indicator")
            self.browser.wait_until_test_id("incident-sparkline")
            self.browser.snapshot("incidents - list")

            details_url = u'[href="/organizations/{}/alerts/{}/'.format(
                self.organization.slug, incident.identifier)
            self.browser.wait_until(details_url)
            self.browser.click(details_url)
            self.browser.wait_until_not(".loading-indicator")
            self.browser.wait_until_test_id("incident-title")

            self.browser.wait_until_not('[data-test-id="loading-placeholder"]')
            self.browser.snapshot("incidents - details")
示例#5
0
 def test_simple(self):
     incident_type = IncidentType.CREATED
     title = 'hello'
     query = 'goodbye'
     date_started = timezone.now()
     other_project = self.create_project()
     other_group = self.create_group(project=other_project)
     incident = create_incident(
         self.organization,
         type=incident_type,
         title=title,
         query=query,
         date_started=date_started,
         projects=[self.project],
         groups=[self.group, other_group],
     )
     assert incident.identifier == 1
     assert incident.status == incident_type.value
     assert incident.title == title
     assert incident.query == query
     assert incident.date_started == date_started
     assert incident.date_detected == date_started
     assert IncidentGroup.objects.filter(
         incident=incident, group__in=[self.group,
                                       other_group]).count() == 2
     assert IncidentProject.objects.filter(
         incident=incident,
         project__in=[self.project, other_project],
     ).count() == 2
     assert IncidentActivity.objects.filter(
         incident=incident,
         type=IncidentActivityType.CREATED.value,
         event_stats_snapshot__isnull=False,
     ).count() == 1
示例#6
0
    def post(self, request, organization):
        if not features.has(
                "organizations:incidents", organization, actor=request.user):
            return self.respond(status=404)

        serializer = IncidentSerializer(data=request.data,
                                        context={"organization": organization})

        if serializer.is_valid():

            result = serializer.validated_data
            groups = result["groups"]
            all_projects = set(result["projects"]) | set(
                g.project for g in result["groups"])
            if any(p for p in all_projects
                   if not request.access.has_project_access(p)):
                raise PermissionDenied

            incident = create_incident(
                organization=organization,
                type=IncidentType.CREATED,
                title=result["title"],
                query=result.get("query", ""),
                aggregation=result["aggregation"],
                date_started=result.get("dateStarted"),
                date_detected=result.get("dateDetected"),
                projects=result["projects"],
                groups=groups,
                user=request.user,
            )
            return Response(serialize(incident, request.user), status=201)
        return Response(serializer.errors, status=400)
示例#7
0
 def test_simple(self):
     status = IncidentStatus.CREATED
     title = 'hello'
     query = 'goodbye'
     date_started = timezone.now()
     other_project = self.create_project()
     other_group = self.create_group(project=other_project)
     incident = create_incident(
         self.organization,
         status=status,
         title=title,
         query=query,
         date_started=date_started,
         projects=[self.project],
         groups=[self.group, other_group],
     )
     assert incident.identifier == 1
     assert incident.status == status.value
     assert incident.title == title
     assert incident.query == query
     assert incident.date_started == date_started
     assert incident.date_detected == date_started
     assert IncidentGroup.objects.filter(
         incident=incident, group__in=[self.group,
                                       other_group]).count() == 2
     assert IncidentProject.objects.filter(
         incident=incident,
         project__in=[self.project, other_project],
     ).count() == 2
示例#8
0
    def post(self, request, organization):
        if not features.has(
                'organizations:incidents', organization, actor=request.user):
            return self.respond(status=404)

        serializer = IncidentSerializer(
            data=request.data,
            context={'organization': organization},
        )

        if serializer.is_valid():

            result = serializer.validated_data
            groups = result['groups']
            all_projects = set(result['projects']) | set(
                g.project for g in result['groups'])
            if any(p for p in all_projects
                   if not request.access.has_project_access(p)):
                raise PermissionDenied

            incident = create_incident(
                organization=organization,
                type=IncidentType.CREATED,
                title=result['title'],
                query=result.get('query', ''),
                date_started=result['dateStarted'],
                date_detected=result.get('dateDetected',
                                         result['dateStarted']),
                projects=result['projects'],
                groups=groups,
                user=request.user,
            )
            return Response(serialize(incident, request.user), status=201)
        return Response(serializer.errors, status=400)
    def post(self, request, organization):
        if not features.has('organizations:incidents', organization, actor=request.user):
            return self.respond(status=404)

        serializer = IncidentSerializer(
            data=request.DATA,
            context={'organization': organization},
        )

        if serializer.is_valid():

            result = serializer.object
            groups = result['groups']
            all_projects = set(result['projects']) | set(g.project for g in result['groups'])
            if any(p for p in all_projects if not request.access.has_project_access(p)):
                raise PermissionDenied

            incident = create_incident(
                organization=organization,
                status=IncidentStatus.CREATED,
                title=result['title'],
                query=result.get('query', ''),
                date_started=result['dateStarted'],
                date_detected=result.get('dateDetected', result['dateStarted']),
                projects=result['projects'],
                groups=groups,
            )
            return Response(serialize(incident, request.user), status=201)
        return Response(serializer.errors, status=400)
示例#10
0
    def test_simple(self):
        incident_type = IncidentType.CREATED
        title = "hello"
        query = "goodbye"
        date_started = timezone.now()
        other_project = self.create_project()
        other_group = self.create_group(project=other_project)
        self.record_event.reset_mock()
        alert_rule = create_alert_rule(
            self.organization,
            [self.project],
            "hello",
            AlertRuleThresholdType.ABOVE,
            "level:error",
            QueryAggregations.TOTAL,
            10,
            1000,
            400,
            1,
        )

        incident = create_incident(
            self.organization,
            type=incident_type,
            title=title,
            query=query,
            date_started=date_started,
            projects=[self.project],
            groups=[self.group, other_group],
            alert_rule=alert_rule,
        )
        assert incident.identifier == 1
        assert incident.status == incident_type.value
        assert incident.title == title
        assert incident.query == query
        assert incident.date_started == date_started
        assert incident.date_detected == date_started
        assert incident.alert_rule == alert_rule
        assert (IncidentGroup.objects.filter(
            incident=incident, group__in=[self.group,
                                          other_group]).count() == 2)
        assert (IncidentProject.objects.filter(
            incident=incident, project__in=[self.project,
                                            other_project]).count() == 2)
        assert (IncidentActivity.objects.filter(
            incident=incident,
            type=IncidentActivityType.CREATED.value,
            event_stats_snapshot__isnull=False,
        ).count() == 1)
        assert len(self.record_event.call_args_list) == 1
        event = self.record_event.call_args[0][0]
        assert isinstance(event, IncidentCreatedEvent)
        assert event.data == {
            "organization_id": six.text_type(self.organization.id),
            "incident_id": six.text_type(incident.id),
            "incident_type": six.text_type(IncidentType.CREATED.value),
        }
        self.calculate_incident_suspects.apply_async.assert_called_once_with(
            kwargs={"incident_id": incident.id})
示例#11
0
    def trigger_alert_threshold(self, trigger):
        """
        Called when a subscription update exceeds the value defined in the
        `trigger.alert_threshold`, and the trigger hasn't already been activated.
        Increments the count of how many times we've consecutively exceeded the threshold, and if
        above the `threshold_period` defined in the alert rule then mark the trigger as
        activated, and create an incident if there isn't already one.
        :return:
        """
        self.trigger_alert_counts[trigger.id] += 1
        if self.trigger_alert_counts[
                trigger.id] >= self.alert_rule.threshold_period:
            metrics.incr("incidents.alert_rules.trigger",
                         tags={"type": "fire"})
            # Only create a new incident if we don't already have an active one
            if not self.active_incident:
                detected_at = self.last_update
                # Subscriptions label buckets by the end of the bucket, whereas discover
                # labels them by the front. This causes us an off-by-one error with
                # alert start dates, so to prevent this we subtract a bucket off of the
                # start date.
                # We also multiply by threshold_period so that we can show when the
                # alert actually started happening, rather than when we detected it.
                detected_at -= timedelta(
                    seconds=self.alert_rule.snuba_query.time_window *
                    self.alert_rule.threshold_period)
                self.active_incident = create_incident(
                    self.alert_rule.organization,
                    IncidentType.ALERT_TRIGGERED,
                    # TODO: Include more info in name?
                    self.alert_rule.name,
                    alert_rule=self.alert_rule,
                    date_started=detected_at,
                    date_detected=detected_at,
                    projects=[self.subscription.project],
                )
            # Now create (or update if it already exists) the incident trigger so that
            # we have a record of this trigger firing for this incident
            incident_trigger = self.incident_triggers.get(trigger.id)
            if incident_trigger:
                incident_trigger.status = TriggerStatus.ACTIVE.value
                incident_trigger.save()
            else:
                incident_trigger = IncidentTrigger.objects.create(
                    incident=self.active_incident,
                    alert_rule_trigger=trigger,
                    status=TriggerStatus.ACTIVE.value,
                )
            self.handle_incident_severity_update()
            self.handle_trigger_actions(incident_trigger)
            self.incident_triggers[trigger.id] = incident_trigger

            # TODO: We should create an audit log, and maybe something that keeps
            # all of the details available for showing on the incident. Might be a json
            # blob or w/e? Or might be able to use the audit log

            # We now set this threshold to 0. We don't need to count it anymore
            # once we've triggered an incident.
            self.trigger_alert_counts[trigger.id] = 0
示例#12
0
    def test_simple(self):
        incident_type = IncidentType.ALERT_TRIGGERED
        title = "hello"
        query = "goodbye"
        aggregation = QueryAggregations.UNIQUE_USERS
        date_started = timezone.now()
        other_project = self.create_project(fire_project_created=True)
        other_group = self.create_group(project=other_project)
        alert_rule = create_alert_rule(
            self.organization,
            [self.project],
            "hello",
            "level:error",
            QueryAggregations.TOTAL,
            10,
            1,
        )

        self.record_event.reset_mock()
        incident = create_incident(
            self.organization,
            type=incident_type,
            title=title,
            query=query,
            aggregation=aggregation,
            date_started=date_started,
            projects=[self.project],
            groups=[self.group, other_group],
            alert_rule=alert_rule,
        )
        assert incident.identifier == 1
        assert incident.status == IncidentStatus.OPEN.value
        assert incident.type == incident_type.value
        assert incident.title == title
        assert incident.query == query
        assert incident.aggregation == aggregation.value
        assert incident.date_started == date_started
        assert incident.date_detected == date_started
        assert incident.alert_rule == alert_rule
        assert (IncidentGroup.objects.filter(
            incident=incident, group__in=[self.group,
                                          other_group]).count() == 2)
        assert (IncidentProject.objects.filter(
            incident=incident, project__in=[self.project,
                                            other_project]).count() == 2)
        assert (IncidentActivity.objects.filter(
            incident=incident,
            type=IncidentActivityType.DETECTED.value).count() == 1)
        assert len(self.record_event.call_args_list) == 1
        event = self.record_event.call_args[0][0]
        assert isinstance(event, IncidentCreatedEvent)
        assert event.data == {
            "organization_id": six.text_type(self.organization.id),
            "incident_id": six.text_type(incident.id),
            "incident_type": six.text_type(IncidentType.ALERT_TRIGGERED.value),
        }
示例#13
0
    def trigger_alert_threshold(self, trigger, metric_value):
        """
        Called when a subscription update exceeds the value defined in the
        `trigger.alert_threshold`, and the trigger hasn't already been activated.
        Increments the count of how many times we've consecutively exceeded the threshold, and if
        above the `threshold_period` defined in the alert rule then mark the trigger as
        activated, and create an incident if there isn't already one.
        :return:
        """
        self.trigger_alert_counts[trigger.id] += 1
        if self.trigger_alert_counts[
                trigger.id] >= self.alert_rule.threshold_period:
            metrics.incr("incidents.alert_rules.trigger",
                         tags={"type": "fire"})
            # Only create a new incident if we don't already have an active one
            if not self.active_incident:
                detected_at = self.calculate_event_date_from_update_date(
                    self.last_update)
                self.active_incident = create_incident(
                    self.alert_rule.organization,
                    IncidentType.ALERT_TRIGGERED,
                    # TODO: Include more info in name?
                    self.alert_rule.name,
                    alert_rule=self.alert_rule,
                    date_started=detected_at,
                    # TODO: This should probably be either the current time or the
                    # message time. Current time likely makes most sense, since this is
                    # when we actually noticed the problem.
                    date_detected=detected_at,
                    projects=[self.subscription.project],
                )
            # Now create (or update if it already exists) the incident trigger so that
            # we have a record of this trigger firing for this incident
            incident_trigger = self.incident_triggers.get(trigger.id)
            if incident_trigger:
                incident_trigger.status = TriggerStatus.ACTIVE.value
                incident_trigger.save()
            else:
                incident_trigger = IncidentTrigger.objects.create(
                    incident=self.active_incident,
                    alert_rule_trigger=trigger,
                    status=TriggerStatus.ACTIVE.value,
                )
            self.handle_incident_severity_update()
            self.handle_trigger_actions(incident_trigger, metric_value)
            self.incident_triggers[trigger.id] = incident_trigger

            # TODO: We should create an audit log, and maybe something that keeps
            # all of the details available for showing on the incident. Might be a json
            # blob or w/e? Or might be able to use the audit log

            # We now set this threshold to 0. We don't need to count it anymore
            # once we've triggered an incident.
            self.trigger_alert_counts[trigger.id] = 0
示例#14
0
    def test(self):
        closed_incident = create_incident(
            self.organization,
            IncidentType.ALERT_TRIGGERED,
            "Closed",
            "",
            QueryAggregations.TOTAL,
            groups=[self.group],
            date_started=timezone.now() - timedelta(days=30),
        )
        update_incident_status(closed_incident, IncidentStatus.CLOSED)
        open_incident = create_incident(
            self.organization,
            IncidentType.ALERT_TRIGGERED,
            "Open",
            "",
            QueryAggregations.TOTAL,
            groups=[self.group],
            date_started=timezone.now() - timedelta(days=30),
        )
        incidents = [closed_incident, open_incident]
        changed = False
        for incident, incident_stats in zip(
                incidents, bulk_get_incident_stats(incidents)):
            event_stats = get_incident_event_stats(incident)
            assert incident_stats["event_stats"].data[
                "data"] == event_stats.data["data"]
            expected_start = incident_stats["event_stats"].start
            expected_end = incident_stats["event_stats"].end
            if not changed:
                expected_start = expected_start - calculate_incident_prewindow(
                    expected_start, expected_end, incident)
                changed = True
            assert event_stats.start == expected_start
            assert event_stats.end == expected_end
            assert incident_stats["event_stats"].rollup == event_stats.rollup

            aggregates = get_incident_aggregates(incident)
            assert incident_stats["total_events"] == aggregates["count"]
            assert incident_stats["unique_users"] == aggregates["unique_users"]
示例#15
0
    def trigger_alert_threshold(self, trigger):
        """
        Called when a subscription update exceeds the value defined in the
        `trigger.alert_threshold`, and the trigger hasn't already been activated.
        Increments the count of how many times we've consecutively exceeded the threshold, and if
        above the `threshold_period` defined in the alert rule then mark the trigger as
        activated, and create an incident if there isn't already one.
        :return:
        """
        self.trigger_alert_counts[trigger.id] += 1
        if self.trigger_alert_counts[
                trigger.id] >= self.alert_rule.threshold_period:
            # Only create a new incident if we don't already have an active one
            if not self.active_incident:
                detected_at = self.last_update
                self.active_incident = create_incident(
                    self.alert_rule.organization,
                    IncidentType.ALERT_TRIGGERED,
                    # TODO: Include more info in name?
                    self.alert_rule.name,
                    alert_rule=self.alert_rule,
                    query=self.subscription.query,
                    aggregation=QueryAggregations(self.alert_rule.aggregation),
                    date_started=detected_at,
                    date_detected=detected_at,
                    projects=[self.subscription.project],
                )
            # Now create (or update if it already exists) the incident trigger so that
            # we have a record of this trigger firing for this incident
            incident_trigger = self.incident_triggers.get(trigger.id)
            if incident_trigger:
                incident_trigger.status = TriggerStatus.ACTIVE.value
                incident_trigger.save()
            else:
                incident_trigger = IncidentTrigger.objects.create(
                    incident=self.active_incident,
                    alert_rule_trigger=trigger,
                    status=TriggerStatus.ACTIVE.value,
                )
            self.handle_incident_severity_update()
            self.handle_trigger_actions(incident_trigger)
            self.incident_triggers[trigger.id] = incident_trigger

            # TODO: We should create an audit log, and maybe something that keeps
            # all of the details available for showing on the incident. Might be a json
            # blob or w/e? Or might be able to use the audit log

            # We now set this threshold to 0. We don't need to count it anymore
            # once we've triggered an incident.
            self.trigger_alert_counts[trigger.id] = 0
示例#16
0
 def test_closed(self):
     incident = create_incident(
         self.organization,
         IncidentType.CREATED,
         "Test",
         "",
         timezone.now(),
         projects=[self.project],
     )
     with self.assertChanges(
         lambda: IncidentSnapshot.objects.filter(incident=incident).exists(),
         before=False,
         after=True,
     ):
         self.run_test(incident, IncidentStatus.CLOSED, timezone.now())
示例#17
0
 def test_reopened(self):
     incident = create_incident(
         self.organization,
         IncidentType.CREATED,
         'Test',
         '',
         timezone.now(),
         projects=[self.project],
     )
     update_incident_status(incident, IncidentStatus.CLOSED)
     with self.assertChanges(
             lambda: IncidentSnapshot.objects.filter(incident=incident).
             exists(),
             before=True,
             after=False,
     ):
         self.run_test(incident, IncidentStatus.OPEN, None)
示例#18
0
 def test_simple(self):
     incident_type = IncidentType.CREATED
     title = 'hello'
     query = 'goodbye'
     date_started = timezone.now()
     other_project = self.create_project()
     other_group = self.create_group(project=other_project)
     self.record_event.reset_mock()
     incident = create_incident(
         self.organization,
         type=incident_type,
         title=title,
         query=query,
         date_started=date_started,
         projects=[self.project],
         groups=[self.group, other_group],
     )
     assert incident.identifier == 1
     assert incident.status == incident_type.value
     assert incident.title == title
     assert incident.query == query
     assert incident.date_started == date_started
     assert incident.date_detected == date_started
     assert IncidentGroup.objects.filter(
         incident=incident, group__in=[self.group,
                                       other_group]).count() == 2
     assert IncidentProject.objects.filter(
         incident=incident,
         project__in=[self.project, other_project],
     ).count() == 2
     assert IncidentActivity.objects.filter(
         incident=incident,
         type=IncidentActivityType.CREATED.value,
         event_stats_snapshot__isnull=False,
     ).count() == 1
     assert len(self.record_event.call_args_list) == 1
     event = self.record_event.call_args[0][0]
     assert isinstance(event, IncidentCreatedEvent)
     assert event.data == {
         'organization_id': six.text_type(self.organization.id),
         'incident_id': six.text_type(incident.id),
         'incident_type': six.text_type(IncidentType.CREATED.value),
     }
     self.calculate_incident_suspects.apply_async.assert_called_once_with(
         kwargs={'incident_id': incident.id}, )
示例#19
0
    def test_incidents_list(self):
        incident = create_incident(
            self.organization,
            type=IncidentType.CREATED,
            title="Incident #1",
            query="",
            date_started=timezone.now(),
            projects=[self.project],
            groups=[self.group],
        )
        with self.feature(FEATURE_NAME):
            self.browser.get(self.path)
            self.browser.wait_until_not(".loading-indicator")
            self.browser.snapshot("incidents - list")

            details_url = u'[href="/organizations/{}/incidents/{}/'.format(
                self.organization.slug, incident.identifier)
            self.browser.wait_until(details_url)
            self.browser.click(details_url)
            self.browser.wait_until_not(".loading-indicator")
            self.browser.wait_until_test_id("incident-title")

            self.browser.wait_until_not('[data-test-id="loading-placeholder"]')
            self.browser.snapshot("incidents - details")