示例#1
0
    def delete_incident(incident_id, system_id, **kwargs):
        """
		Deletes an incident for a selected system.
		@param incident_id: The id of the incident to be deleted
		@type incident_id: str
		@param system_id: System where the incident is defined in
		@type system_id: str
		@param kwargs: Extra key-value arguments to pass for incident deleting
		@return: Response code dictionary to indicate if the incident was deleted or not
		@rtype: dict
		"""
        try:
            system = SystemService().filter(pk=system_id,
                                            state__name='Active').first()
            if system is None:
                return {"code": "800.400.002"}
            incident = IncidentService().filter(pk=incident_id,
                                                system=system).first()
            if incident:
                if incident.delete():
                    return {
                        'code': '800.200.001',
                        'Message': 'Incident deleted successfully'
                    }
        except Exception as ex:
            lgr.exception("Incident Delete exception %s" % ex)
        return {"code": "800.400.001"}
示例#2
0
    def get_events(system_id):
        """
		Retrieves events logged for a certain system
		@param: system_id: Id of the system
		@type system_id: str
		@return: Response code indicating status and logged events
		"""
        try:
            system = SystemService().get(pk=system_id, state__name='Active')
            if not system:
                return {'code': '800.400.200'}
            events = list(EventService().filter(
                system=system, state__name='Active').values(
                    'id',
                    'date_created',
                    'interface',
                    'method',
                    'request',
                    'response',
                    'stack_trace',
                    'description',
                    'code',
                    status=F('state__name'),
                    system_name=F('system__name'),
                    eventtype=F('event_type__name')).order_by('-date_created'))
            return {'code': '800.200.001', 'data': events}
        except Exception as ex:
            lgr.exception("Get events Exception %s" % ex)
        return {'code': '800.400.001'}
 def test_get(self):
     """
     Test System get service
     """
     mixer.blend('core.System', name='Helaplan')
     system = SystemService().get(name='Helaplan')
     assert system is not None, 'Should have a System object'
 def test_update(self):
     """
     Test System update service
     """
     system = mixer.blend('core.System')
     system = SystemService().update(system.id, name="Helaplan")
     assert system.name == "Helaplan", 'Should have the same name'
 def test_filter(self):
     """
     Test System filter service
     """
     mixer.cycle(3).blend('core.System')
     systems = SystemService().filter()
     assert len(systems) == 3, 'Should have 3 System objects'
示例#6
0
    def get_endpoints(system):
        """
		@param system: System where the endpoint is configured
		@type system:str
		@return: endpoints: dictionary containing a success code and a list of dictionary containing endpoints data
		@rtype: dict
		"""
        try:
            system = SystemService().filter(pk=system, state__name='Active')
            if not system:
                return {'code': '800.400.002', 'message': 'Invalid parameters'}
            endpoints = list(EndpointService().filter(system=system).values(
                'id',
                'name',
                'description',
                'url',
                'optimal_response_time',
                'date_created',
                'date_modified',
                system_name=F('system__name'),
                type=F('endpoint_type__name'),
                state_name=F('state__name')))
            return {'code': '800.200.001', 'data': endpoints}
        except Exception as ex:
            lgr.exception("Endpoint Administration exception: %s" % ex)
        return {
            'code': '800.400.001',
            "message": "Error. Could not retrieve endpoints"
        }
    def delete_rule(rule_id, system_id, **kwargs):
        """
		Deletes an escalation rule for a selected system.
		@param rule_id: The id of the rule to be deleted
		@type rule_id: str
		@param system_id: System where the escalation rule is defined in
		@type system_id: str
		@param kwargs: Extra key-value arguments to pass for incident logging
		@return: Response code dictionary to indicate if the incident was created or not
		@rtype: dict
		"""
        try:
            system = SystemService().filter(pk=system_id,
                                            state__name='Active').first()
            escalation_rule = EscalationRuleService().filter(
                pk=rule_id, system=system).first()
            if system is None or escalation_rule is None:
                return {"code": "800.400.002"}
            if escalation_rule.delete():
                return {
                    'code': '800.200.001',
                    'Message': 'Rule deleted successfully'
                }
        except Exception as ex:
            lgr.exception("Delete Escalation Rule exception %s" % ex)
        return {"code": "800.400.001"}
    def get_rules(system_id, **kwargs):
        """
		Retrieves all escalation rule for a selected system.
		@param system_id: System where the rule is defined
		@type system_id: str | None
		@param kwargs: Extra key-value arguments to pass for incident logging
		@return: Response code dictionary to indicate if the incident was created or not
		@rtype: dict
		"""
        try:
            system = SystemService().filter(pk=system_id,
                                            state__name='Active').first()
            if system is None:
                return {"code": "800.400.002"}
            escalation_rules = list(
                EscalationRuleService().filter(system=system).values(
                    'id',
                    'name',
                    'description',
                    'duration',
                    'date_created',
                    'date_modified',
                    'nth_event',
                    system_id=F('system'),
                    escalation_level_name=F('escalation_level__name'),
                    state_name=F('state__name'),
                    event_type_name=F('event_type__name')).order_by(
                        '-date_created'))
            for rule in escalation_rules:
                rule.update(
                    duration=timedelta.total_seconds(rule.get('duration')))
            return {'code': '800.200.001', 'data': escalation_rules}
        except Exception as ex:
            lgr.exception("Get Escalation Rules exception %s" % ex)
        return {"code": "800.400.001"}
    def create_rule(name, description, system, event_type, nth_event,
                    escalation_level, duration, **kwargs):
        """
		Creates an escalation rule for a selected system.
		@param name: Name of the escalation rule to be created
		@type name: str
		@param system: The system which the escalation rule will be applied in
		@type system: str
		@param description: Details on the Escalation Rule
		@type description: str
		@param event_type: Type of the event(s) to be affected by the rule
		@type event_type: str
		@param nth_event: Number of event of a certain type that need to be logged to raise an escalation
		@type nth_event: str
		@param duration: Time period within which certain events must occur to trigger an escalation.
		@type duration: int
		@param escalation_level: Level at which an escalation is configured with a set of recipients
		@type escalation_level: str
		@param kwargs: Extra key-value arguments to pass for incident logging
		@return: Response code dictionary to indicate if the incident was created or not
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name="Active")
            escalation_level = EscalationLevelService().get(
                pk=escalation_level, state__name="Active")
            event_type = EventTypeService().get(pk=event_type,
                                                state__name='Active')
            if system is None or escalation_level is None or event_type is None:
                return {"code": "800.400.002"}

            escalation_rule = EscalationRuleService().create(
                name=name,
                description=description,
                system=system,
                nth_event=int(nth_event),
                duration=timedelta(seconds=duration),
                state=StateService().get(name='Active'),
                escalation_level=escalation_level,
                event_type=event_type)
            if escalation_rule is not None:
                rule = EscalationRuleService().filter(
                    pk=escalation_rule.id, system=system).values(
                        'id',
                        'name',
                        'description',
                        'duration',
                        'date_created',
                        'date_modified',
                        'nth_event',
                        system_id=F('system'),
                        escalation_level_name=F('escalation_level__name'),
                        state_name=F('state__name'),
                        event_type_name=F('event_type__name')).first()
                rule.update(
                    duration=timedelta.total_seconds(rule.get('duration')))
                return {'code': '800.200.001', 'data': rule}
        except Exception as ex:
            lgr.exception("Escalation Rule Creation exception %s" % ex)
        return {"code": "800.400.001"}
示例#10
0
    def get_event(event_id, system_id):
        """
		Retrieves an event logged for a certain system
		@param: event_id: Id of the event
		@type event_id: str
		@param: system_id: Id of the system
		@type system_id: str
		@return: Response code indicating status and logged event
		"""
        try:
            system = SystemService().get(pk=system_id, state__name='Active')
            event = EventService().filter(
                pk=event_id, system=system, state__name='Active').values(
                    'id',
                    'date_created',
                    'interface',
                    'method',
                    'request',
                    'response',
                    'stack_trace',
                    'description',
                    'code',
                    status=F('state__name'),
                    system_name=F('system__name'),
                    eventtype=F('event_type__name')).first()
            if system is None or event is None:
                return {'code': '800.400.200', 'event': str(event_id)}
            return {'code': '800.200.001', 'data': event}
        except Exception as ex:
            lgr.exception("Get event Exception %s" % ex)
        return {'code': '800.400.001'}
示例#11
0
    def create_endpoint(name, description, url, system_id, color,
                        response_time, endpoint_type_id, state_id):
        """
		@param color: color the line graph will use when plotting
		@type color: str
		@param name: name of endpoint to be created
		@type name:str
		@param description: description of endpoint to be created
		@type description: str
		@param url: url of endpoint to be created
		@type: str
		@param system_id: id of system the endpoint will belong to
		@type : int
		@param response_time: average response time the endpoint should take
		@type: int
		@param endpoint_type_id: id of endpoint type the endpoint will belong to
		@type endpoint_type_id: int
		@param state_id: the id of  initial state of the created endpoint will have
		@type state_id: int
		@return: Response code dictionary to indicate if the endpoint was created or not
		@rtype:dict
		"""
        try:
            system = SystemService().get(id=system_id, state__name="Active")
            endpoint_type = EndpointTypeService().get(id=endpoint_type_id,
                                                      state__name="Active")
            state = StateService().get(id=state_id)

            if not (system and endpoint_type and state and name and description
                    and response_time and url):
                return {"code": "800.400.002", "message": "Missing parameters"}
            exist = True if EndpointService().filter(system = system, url = url) \
             else EndpointService().filter(system = system, name = name)
            if exist:
                return {
                    "code": "800.400.001",
                    "message": "An endpoint with this url or name exists"
                }
            endpoint = EndpointService().create(
                name=name,
                description=description,
                url=url,
                system=system,
                endpoint_type=endpoint_type,
                color=color,
                optimal_response_time=datetime.timedelta(
                    seconds=int(response_time)),
                state=state)
            return {
                "code": "800.200.001",
                "message": "successfully created endpoint: %s" % endpoint.name
            }
        except Exception as ex:
            lgr.exception("Endpoint Administration exception: %s" % ex)
        return {
            "code": "800.400.001",
            "message": "Error when creating an endpoint"
        }
示例#12
0
    def send_notification(message, message_type, recipients, system_id):
        """
		Create and sends a notification
		@param system_id: id of the system the notification is created from
		@type system_id:str
		@param message: a string of the content to be sent
		@type:str
		@param message_type: a string indicating the notification type
		@type: str
		@param recipients: a list containing either email or Phone_number dependent on message type
		@type:list
		@return: returns a dict of a code for success or failure
		@rtype: dict
		"""
        if not (recipients or message or message_type):
            return {"code": "800.400.002"}

        try:
            for recipient in recipients:
                data = NotificationService().create(
                    message=message,
                    notification_type=NotificationTypeService().get(
                        name=message_type),
                    recipient=recipient,
                    system=SystemService().get(pk=system_id),
                    state=StateService().get(name='Active'))

                if data is not None:
                    message_data = {
                        "destination": data.recipient,
                        "message_type": data.notification_type.name,
                        "lang": None,
                        "corporate_id": None,
                        "message_code": 'HPS0006',
                        "replace_tags": {
                            "code": None,
                            'corporate': None,
                            'date': datetime.now().strftime('%d/%m/%y'),
                            'time': datetime.now().time().strftime('%I:%M%p')
                        }
                    }
                    # to do a call to notification API check if it returns a code for success
                    if message_data:
                        NotificationService().update(
                            data.id, state=StateService().get(name='Sent'))
                    else:
                        data = NotificationService().update(
                            data.id, state=StateService().get(name='Failed'))
                        lgr.warn("Message sending failed: %s" % data)
                else:
                    return {"code": "200.400.005"}
            return {"code": "800.200.001", "message": message}
        except Exception as e:
            lgr.exception("Notification logger exception %s" % e)
        return {
            "code": "800.400.001",
            "message": "error in sending notification interface"
        }
示例#13
0
    def get_incidents(system, incident_type=None, **kwargs):
        """
		Retrieves incidents within the specified system
		@param system: System where the incident is created in
		@type system: str
		@param incident_type: Type of the incident
		@type incident_type: str
		@param kwargs: Extra key, value arguments to be passed
		@return: incidents | response code to indicate errors retrieving the incident
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name='Active')
            incident_type = IncidentTypeService().get(name=incident_type,
                                                      state__name='Active')
            if not system:
                return {'code': '800.400.002'}
            states = kwargs.get('states', None)
            incidents = IncidentService().filter(state__name__in = states) if states is not None else \
             IncidentService().filter()
            incidents = incidents.filter(
                incident_type=incident_type) if incident_type else incidents

            incidents = list(
                incidents.filter(system=system).values(
                    'id',
                    'name',
                    'description',
                    'priority_level',
                    'date_created',
                    'date_modified',
                    'scheduled_for',
                    'scheduled_until',
                    system_id=F('system__id'),
                    incident_type_name=F('incident_type__name'),
                    state_id=F('state__id'),
                    state_name=F('state__name'),
                    system_name=F('system__name'),
                    event_type_id=F('event_type__id')).order_by(
                        '-date_created'))
            for incident in incidents:
                incident_updates = list(IncidentLogService().filter(
                    incident__id=incident.get('id')).values(
                        'id',
                        'description',
                        'priority_level',
                        'date_created',
                        'date_modified',
                        user_id=F('user__id'),
                        username=F('user__username'),
                        escalation_level_id=F('escalation_level__id'),
                        state_name=F('state__name'),
                        state_id=F('state__id')).order_by('-date_created'))
                incident.update(incident_updates=incident_updates)
            return {'code': '800.200.001', 'data': incidents}
        except Exception as ex:
            lgr.exception("Get incidents exception %s" % ex)
        return {'code': '800.400.001'}
示例#14
0
    def get_system_recipient(user_id, system_id):
        """
		@param system_id: the id of the system the recipient belongs to
		@type:str
		@param user_id: the id of the recipient
		@type: str
		@return:recipients:a dictionary containing a success code and a list of dictionaries containing  system
							recipient data
		@rtype:dict
		"""
        try:
            escalations_levels = []
            notification_types = []
            state = []
            system_recipient_id = []
            system = SystemService().get(id=system_id)
            recipient = User.objects.get(id=user_id)
            if not (system and recipient):
                return {"code": "800.400.002", "message": "missing parameters"}
            system_recipient = SystemRecipientService().filter(
                system=system,
                recipient=recipient).values(userName=F('recipient__username'),
                                            recipientId=F('recipient'),
                                            systemRecipientId=F('id')).first()
            recipients = list(SystemRecipientService().filter(
                system=system, recipient=recipient).values(
                    'state',
                    userName=F('recipient__username'),
                    notificationType=F('notification_type'),
                    systemRecipientId=F('id'),
                    escalationLevel=F('escalation_level')))
            if system_recipient:
                for recipient in recipients:
                    escalations_levels.append(recipient.get('escalationLevel'))
                    notification_types.append(
                        recipient.get('notificationType'))
                    state.append(recipient.get('state'))
                    system_recipient_id.append(
                        recipient.get('systemRecipientId'))
                data = [{
                    'escalation_level_id': escalations_levels[i],
                    'notification_type_id': notification_types[i],
                    'state_id': state[i],
                    'system_recipient_id': system_recipient_id[i]
                } for i in range(len(escalations_levels))]
                system_recipient.update(escalationLevels=data)
                return {'code': '800.200.001', 'data': system_recipient}
            return {
                'code': '800.200.001',
                'data': 'There is no such system recipient'
            }
        except Exception as ex:
            lgr.exception("Recipient Administration Exception:  %s" % ex)
        return {
            "code": "800.400.001",
            "message": "Error while fetching recipient"
        }
    def dashboard_widgets_data(system, date_from=None, date_to=None):
        """
		Retrieves historical data within a specified start and end date range within a system
		@param system: System where the incident is created in
		@type system: str
		@param date_from: Start date limit applied
		@type date_from: str | None
		@param date_to: End date limit to be applied
		@type date_to: str | None
		@return: incidents | response code to indicate errors retrieving the data
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name='Active')
            if not system:
                return {'code': '800.400.002'}
            if date_from and date_to:
                date_from = dateutil.parser.parse(date_from)
                date_to = dateutil.parser.parse(date_to)
            else:
                date_from = datetime.combine(
                    datetime.now(), datetime.min.time()) + timedelta(days=1)
                date_to = date_from - timedelta(days=1)
            reported_events = EventService().filter(
                system=system,
                date_created__lte=date_from,
                date_created__gte=date_to).count()
            open_incidents = IncidentService().filter(
                system=system,
                incident_type__name='Realtime',
                date_created__lte=date_from,
                date_created__gte=date_to).exclude(
                    state__name='Resolved').count()
            closed_incidents = IncidentService().filter(
                system=system,
                incident_type__name='Realtime',
                state__name='Resolved',
                date_created__lte=date_from,
                date_created__gte=date_to).count()
            scheduled_incidents = IncidentService().filter(
                system=system,
                incident_type__name='Scheduled',
                date_created__lte=date_from,
                date_created__gte=date_to).exclude(
                    state__name='Completed').count()
            data = {
                'reported_events': reported_events,
                'open_incidents': open_incidents,
                'closed_incidents': closed_incidents,
                'scheduled_incidents': scheduled_incidents
            }
            return {'code': '800.200.001', 'data': data}
        except Exception as ex:
            lgr.exception("Get incidents exception %s" % ex)
        return {'code': '800.400.001'}
 def test_create(self):
     """
     Test System create service
     """
     state = mixer.blend('base.State')
     admin = mixer.blend(User)
     system = SystemService().create(name='Helaplan',
                                     state=state,
                                     admin=admin)
     assert system is not None, 'Should have a System object'
     assert system.name == 'Helaplan', 'Created System name is equals to Helaplan'
示例#17
0
    def create_system_recipient(system_id, user_id, escalations):
        """
		@param system_id: The id of the system the recipient will belong to
		@type:str
		@param user_id: The id of the recipient
		@type:str
		@param escalations:A list of dictionaries containing notification type id and escalation level_id
		@type:list
		@return:a dictionary containing response code  and data indicating a success or failure in creation
		@rtype: dict
		"""

        try:
            system = SystemService().get(id=system_id)
            recipient = User.objects.get(id=user_id)
            if not (system and recipient and escalations):
                return {
                    "code": "800.400.002",
                    "message": "Invalid parameters given"
                }

            for escalation in escalations:
                if SystemRecipientService().filter(
                        system=system,
                        recipient=recipient,
                        escalation_level=EscalationLevelService().get(
                            id=escalation.get('EscalationLevel')),
                ):
                    return {
                        "code":
                        "800.400.001",
                        "message":
                        "system recipient already exist consider updating the recipient"
                    }
                SystemRecipientService().create(
                    system=system,
                    recipient=recipient,
                    escalation_level=EscalationLevelService().get(
                        id=escalation.get('EscalationLevel')),
                    notification_type=NotificationTypeService().get(
                        id=escalation.get('NotificationType')),
                    state=StateService().get(name='Active'))
            return {
                "code": "800.200.001",
                "message": " successfully created a system recipient"
            }

        except Exception as ex:
            lgr.exception("Recipient Administration Exception: %s" % ex)
        return {
            "code": "800.400.001",
            "message": "Error while creating a system recipient"
        }
示例#18
0
    def get_look_up_data():
        """
		@return: a dictionary containing a success code and a list of dictionaries containing  system
							recipient data
		@rtype:dict
		"""
        try:
            state = list(StateService().filter().values('id', 'name'))
            notification_type = list(NotificationTypeService().filter().values(
                'id', 'name'))
            escalation_level = list(EscalationLevelService().filter().values(
                'id', 'name'))
            event_type = list(EventTypeService().filter().values('id', 'name'))
            endpoint_type = list(EndpointTypeService().filter().values(
                'id', 'name'))
            incident_type = list(IncidentTypeService().filter().values(
                'id', 'name'))
            user = list(User.objects.all().values('id', 'username'))
            system = list(SystemService().filter().values('id', 'name'))
            endpoint_states = list(StateService().filter(
                Q(name='Operational') | Q(name='Minor Outage')
                | Q(name='Major Outage') | Q(name='Under Maintenance')
                | Q(name='Degraded Performance')).values('id', 'name'))
            realtime_incident_states = list(StateService().filter(
                Q(name='Investigating') | Q(name='Identified')
                | Q(name='Monitoring') | Q(name='Resolved')).values(
                    'id', 'name'))
            scheduled_incident_states = list(StateService().filter(
                Q(name='Scheduled') | Q(name='InProgress')
                | Q(name='Completed')).values('id', 'name'))
            lookups = {
                'states': state,
                'incident_types': incident_type,
                'escalation_levels': escalation_level,
                'notification_types': notification_type,
                'endpoint_types': endpoint_type,
                'event_types': event_type,
                'users': user,
                'systems': system,
                'realtime_incident_states': realtime_incident_states,
                'endpoint_states': endpoint_states,
                'scheduled_incident_states': scheduled_incident_states
            }

            return {"code": "800.200.001", "data": lookups}

        except Exception as ex:
            lgr.exception("Look up interface Exception:  %s" % ex)
        return {
            "code": "800.400.001",
            "message": "Error while fetching data %s" % str(ex)
        }
示例#19
0
    def get_incident_events(incident_id, system_id, **kwargs):
        """
		Retrieves the events that have caused the incident in a selected system.
		@param incident_id: The id of the incident
		@type incident_id: str
		@param system_id: System where the incident is created in
		@type system_id: str
		@param kwargs: Extra key-value arguments to pass for incident_event retrieval
		@return: Response code dictionary to indicate if the incident_events were retrieved or not
		@rtype: dict
		"""
        from api.backend.interfaces.event_administration import EventLog
        try:
            system = SystemService().filter(pk=system_id,
                                            state__name='Active').first()
            incident = IncidentService().filter(pk=incident_id,
                                                system=system).first()

            if system is None or incident is None:
                return {"code": "800.400.002"}
            incident_events = list(IncidentEventService().filter(
                incident=incident, state__name='Active').values(
                    incident_id=F('incident'),
                    status=F('state__name'),
                    event_id=F('event')).order_by('-date_created'))
            events = []
            for incident_event in incident_events:
                event = EventLog.get_event(incident_event.get('event_id'),
                                           system.id)
                if event.get('code') != '800.200.001':
                    lgr.error('Event get Failed')
                else:
                    incident_event.update(incident_event=event.get('data'))
                    # incident_event.update(code = event.get('code'))
                    events.append(incident_event)
            return {'code': '800.200.001', 'data': events}
        except Exception as ex:
            lgr.exception("Get Incident Event exception %s" % ex)
        return {"code": "800.400.001"}
    def availability_trend(system, interval):
        """
		Calculates the system availability percentage within a specified start and end date range within a system
		@param system: System whose availability percentage is to be computed
		@type system: str
		@param interval: Time interval to be applied in retrieving metric data points
		@return: Total system availability data points | response code to indicate errors while retrieving
		availability trend of the system as data points to be plotted in a graph
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name='Active')
            if not system and interval:
                return {
                    'code': '800.400.002',
                    'message': 'Missing or invalid parameters'
                }
            today = datetime.now(timezone.utc)
            series = []
            color = '#008000'
            name = 'Availability Percentage Trend'
            datasets = []
            labels = []
            if interval == 'day':
                time_intervals = 24
                interval_length = 1
                identifier = 'day'
            elif interval == 'week':
                time_intervals = 7
                interval_length = 24
                identifier = 'week'
            elif interval == 'month':
                time_intervals = 30
                interval_length = 24
                identifier = 'month'
            else:
                return {
                    'code': '800.400.002',
                    'message': 'Invalid time interval'
                }
            for i in range(1, time_intervals + 1):
                past_interval = today - timedelta(hours=i * interval_length)
                current_interval = past_interval + timedelta(
                    hours=interval_length)
                availability_percentage_result = DashboardAdministration.calculate_system_availability(
                    system=system.id,
                    date_from=current_interval.isoformat(),
                    date_to=past_interval.isoformat())
                if availability_percentage_result.get('code') == '800.200.001':
                    availability_percentage = availability_percentage_result.get(
                        'data').get('uptime_percentage')
                else:
                    return {
                        'code': '800.400.001',
                        'message': availability_percentage_result
                    }
                current_interval = (current_interval +
                                    timedelta(hours=1)).replace(minute=0)
                series.append(
                    dict(value=availability_percentage, name=current_interval))
            # labels.append(current_interval.strftime("%m/%d/%y  %H:%M"))
            result = {
                "name": name,
                "color": color,
                "series": series,
                'time_intervals': time_intervals,
                'identifier': identifier,
                "yAxisValue": "Availability Trend in Percentage"
            }
            datasets.append(result)
            return {'code': '800.200.001', 'data': datasets}
        except Exception as ex:
            lgr.exception("Get uptime trend data exception %s" % ex)
        return {
            'code':
            '800.400.001',
            'msg':
            'Error. Could not retrieve system up time trend data %s' % str(ex)
        }
    def calculate_system_availability(system,
                                      interval=None,
                                      date_from=None,
                                      date_to=None):
        """
		Calculates the system availability percentage within a specified start and end date range within a system
		@param system: System whose availability percentage is to be computed
		@type system: str
		@param interval: time interval to be applied
		@type interval: str | None
		@param date_from: Start date of the time period
		@type date_from: str
		@param date_to: End date of the time period
		@type date_to: str
		@return: system_availability_metric data | response code to indicate errors retrieving availability trend of
		the system
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name='Active')
            if not system or not (interval or date_from and date_to):
                return {'code': '800.400.002', 'message': 'Invalid parameters'}
            if date_from and date_to:
                date_from = dateutil.parser.parse(date_from)
                date_to = dateutil.parser.parse(date_to)
            else:
                date_from = timezone.now()
                if interval == 'day':
                    date_to = date_from - timedelta(hours=24)
                elif interval == 'week':
                    date_to = date_from - timedelta(hours=24 * 7)
                elif interval == 'month':
                    date_to = date_from - timedelta(hours=24 * 30)
                else:
                    return {
                        'code': '800.400.002',
                        'message': 'Invalid time interval'
                    }
            endpoints = EndpointService().filter(system=system)
            total_system_downtime = timedelta()
            latest_downtime = timezone.now()
            total_incidents = IncidentService().filter(
                system=system,
                date_created__gt=date_to,
                date_created__lt=date_from).count()
            for endpoint in endpoints:
                previous_monitor = {'state': None, 'date': None}
                saved_monitors = list(SystemMonitorService().filter(
                    endpoint=endpoint,
                    date_created__gt=date_to,
                    date_created__lt=date_from).order_by(
                        'date_created').values('date_created',
                                               state_name=F('state__name')))
                for monitor in saved_monitors:
                    total_monitor_downtime = timedelta()
                    if monitor.get('state_name') != 'Operational':
                        previous_monitor = {'state': 'Down', 'date': monitor.get('date_created')} if \
                            previous_monitor.get('state') == 'Up' else previous_monitor
                        if previous_monitor.get('state') == 'Up':
                            previous_monitor.update(
                                state='Down', date=monitor.get('date_created'))
                        if not previous_monitor.get('date') and (
                                saved_monitors.index(monitor) == -1
                                or len(saved_monitors) == 1):
                            total_monitor_downtime += date_from - monitor.get(
                                'date_created')
                        else:
                            if not previous_monitor.get('date'):
                                previous_monitor.update(
                                    state='Down',
                                    date=monitor.get('date_created'))
                            total_monitor_downtime += monitor.get(
                                'date_created') - previous_monitor.get('date')
                        previous_monitor.update(
                            state='Down', date=monitor.get('date_created'))
                    else:
                        if not previous_monitor.get('date'):
                            previous_monitor.update(
                                state='Up', date=monitor.get('date_created'))
                        total_monitor_downtime += monitor.get(
                            'date_created') - previous_monitor.get('date')
                        previous_monitor.update(
                            state='Up', date=monitor.get('date_created'))
                    latest_downtime = previous_monitor.get('date')
                    total_system_downtime += total_monitor_downtime
            return {
                'code': '800.200.001',
                'data': {
                    'start_date':
                    date_to.isoformat(),
                    'end_date':
                    date_from.isoformat(),
                    'total_period':
                    str(date_from - date_to),
                    'total_uptime':
                    str((date_from - date_to) - total_system_downtime),
                    'total_downtime':
                    total_system_downtime.total_seconds()
                    if total_system_downtime.total_seconds() > 0 else 0,
                    'uptime_percentage':
                    round(((date_from - date_to) -
                           total_system_downtime).total_seconds() /
                          (date_from - date_to).total_seconds() * 100, 2),
                    'downtime_percentage':
                    round(
                        total_system_downtime.total_seconds() /
                        (date_from - date_to).total_seconds() * 100, 2),
                    'duration_since_downtime':
                    (timezone.now() - latest_downtime).total_seconds(),
                    'incident_count':
                    total_incidents
                }
            }
        except Exception as ex:
            lgr.exception("Calculate downtime percentage exception %s" % ex)
        return {
            'code':
            '800.400.001',
            'msg':
            'Error. Could not calculate total system availability %s ' %
            str(ex)
        }
    def get_current_status(system, **kwargs):
        """
		Retrieves current system status and current incidents if any
		@param system: system whose status is being retrieved
		@type system: str
		@param kwargs: extra key-value args to be passed
		@return: a dictionary with any current incidents, statuses of registered endpoints and the response code
		"""
        try:
            system = SystemService().get(pk=system, state__name='Active')
            if system is None:
                return {'code': '800.400.002', 'message': 'Invalid parameters'}
            current_incidents = list(
                IncidentService().filter(system=system).exclude(
                    Q(state__name='Resolved')
                    | Q(state__name='Completed')).values(
                        'id', 'name', 'description', 'scheduled_for',
                        'scheduled_until', 'priority_level',
                        'event_type__name', 'system__name', 'state__name',
                        'date_created').order_by('-date_created'))
            for incident in current_incidents:
                incident_updates = list(IncidentLogService().filter(
                    incident__id=incident.get('id')).values(
                        'description',
                        'priority_level',
                        'date_created',
                        'date_modified',
                        user_name=F('user__username'),
                        status=F('state__name')).order_by('-date_created'))
                incident.update(incident_updates=incident_updates)
            status_data = {
                'system_id': system.id,
                'name': system.name,
                'description': system.description,
                'incidents': current_incidents,
                'current_state': {}
            }
            endpoints = [
                str(endpoint) for endpoint in list(EndpointService().filter(
                    system=system).values_list('state__name', flat=True))
            ]
            status_data.update(
                current_state={
                    'state': 'status-operational',
                    'description': 'All systems are operational'
                })
            if endpoints is not None:
                for endpoint in endpoints:
                    if 'Major Outage' in endpoints and all(
                            status == 'Major Outage' for status in endpoints):
                        status_data.update(
                            current_state={
                                'state': 'status-critical',
                                'description': 'There is a Major System Outage'
                            })
                        break
                    elif 'Major Outage' in endpoints:
                        status_data.update(
                            current_state={
                                'state': 'status-major',
                                'description':
                                'There is a Partial System Outage'
                            })
                        break
                    elif 'Minor Outage' in endpoints:
                        status_data.update(
                            current_state={
                                'state': 'status-minor',
                                'description': 'There is a Minor System Outage'
                            })
                        break
                    elif 'Degraded Performance' in endpoints:
                        status_data.update(
                            current_state={
                                'state': 'status-minor',
                                'description': 'Partially Degraded Service'
                            })
                        break
                    elif 'Under Maintenance' in endpoints:
                        status_data.update(
                            current_state={
                                'state':
                                'status-maintenance',
                                'description':
                                'A Service is undergoing maintenance'
                            })
                        break
                    else:
                        status_data.update(
                            current_state={
                                'state': 'status-operational',
                                'description': 'All Systems Operational'
                            })
                        break
            return {'code': '800.200.001', 'data': status_data}
        except Exception as ex:
            lgr.exception('Get current system status exception %s' % ex)
        return {'code': '800.400.001'}
    def get_error_rate(system_id, start_date, end_date):
        """
		Calculates and returns the error rate of a system based on logged events
		@param: system_id: Id of the system
		@type system_id: str
		@param start_date: Start point of the data to be presented
		@type: start_date: str
		@param: end_date: End date of the period for which the data is to be extracted
		@type end_date: str
		@return: Response code indicating status and error rate graph data
		"""
        try:
            system = SystemService().get(pk=system_id, state__name='Active')
            if not system:
                return {'code': '800.400.200'}
            now = timezone.now()
            start_date = dateutil.parser.parse(start_date)
            end_date = dateutil.parser.parse(end_date)
            series = []
            color = "#E44D25"
            name = "Number of errors"
            period = start_date - end_date
            dataset = []
            if period.days <= 1:
                for i in range(1, 25):
                    past_hour = now - timedelta(hours=i, minutes=0)
                    current_hour = past_hour + timedelta(hours=1)
                    current_errors = EventService().filter(
                        system=system,
                        event_type__name='Error',
                        date_created__lte=current_hour,
                        date_created__gte=past_hour).count()
                    past_hour = past_hour.replace(minute=0)
                    series.append(dict(value=current_errors, name=past_hour))
                result = {
                    "name": name,
                    "color": color,
                    "series": series,
                    "yAxisValue": "Number of Errors Occurred"
                }
                dataset.append(result)
            elif period.days <= 7:
                for i in range(0, 7):
                    current_day = now - timedelta(days=i, hours=0, minutes=0)
                    past_day = current_day + timedelta(days=1)
                    current_errors = EventService().filter(
                        system=system,
                        event_type__name='Error',
                        date_created__lte=past_day,
                        date_created__gte=current_day).count()
                    past_day = past_day.replace(hour=0, minute=0)
                    series.append(dict(value=current_errors, name=past_day))
                result = {
                    "name": name,
                    "color": color,
                    "series": series,
                    "yAxisValue": "Number of Errors Occurred"
                }
                dataset.append(result)
            elif period.days <= 31:
                for i in range(0, 31):
                    current_day = now - timedelta(days=i, hours=0, minutes=0)
                    past_day = current_day + timedelta(days=1)
                    current_errors = EventService().filter(
                        system=system,
                        event_type__name='Error',
                        date_created__lte=past_day,
                        date_created__gte=current_day).count()
                    past_day = past_day.replace(hour=0, minute=0)
                    series.append(dict(value=current_errors, name=past_day))
                result = {
                    "name": name,
                    "color": color,
                    "series": series,
                    "yAxisValue": "Number of Errors Occurred"
                }
                dataset.append(result)
            elif period.days <= 365:
                current_date = now.replace(day=1,
                                           hour=0,
                                           minute=0,
                                           second=0,
                                           microsecond=0)
                current_month = now.month
                current_date = current_date.replace(
                    day=1, hour=0, minute=0, second=0,
                    microsecond=0) + timedelta(days=calendar.monthrange(
                        current_date.year, current_month)[1] - 1)
                for i in range(1, 13):
                    if current_month > 1:
                        end_date = current_date
                        start_date = current_date - timedelta(
                            days=calendar.monthrange(end_date.year,
                                                     end_date.month)[1] - 1)
                        current_date = current_date - timedelta(
                            days=calendar.monthrange(current_date.year,
                                                     current_month)[1])
                        current_month = current_month - 1
                    else:
                        end_date = current_date
                        start_date = current_date - timedelta(
                            days=calendar.monthrange(end_date.year,
                                                     end_date.month)[1] - 1)
                        current_date = current_date - timedelta(
                            days=calendar.monthrange(current_date.year,
                                                     current_month)[1])
                        current_month = current_date.month
                    current_errors = EventService().filter(
                        system=system,
                        event_type__name='Error',
                        date_created__lte=end_date,
                        date_created__gte=start_date).count()
                    series.append(dict(value=current_errors,
                                       name=current_date))
                result = {
                    "name": name,
                    "color": color,
                    "series": series,
                    "yAxisValue": "Number of Errors Occurred"
                }
                dataset.append(result)
            else:
                intervals = 24
                for i in range(1, intervals + 1):
                    past_hour = now - timedelta(hours=i, minutes=0)
                    current_hour = past_hour + timedelta(hours=1)
                    current_errors = EventService().filter(
                        system=system,
                        event_type__name='Error',
                        date_created__lte=current_hour,
                        date_created__gte=past_hour).count()
                    past_hour = past_hour.replace(minute=0)
                    series.append(dict(value=current_errors, name=past_hour))
                result = {
                    "name": name,
                    "color": color,
                    "series": series,
                    "yAxisValue": "Number of Errors Occurred"
                }
                dataset.append(result)
            return {'code': '800.200.001', 'data': dataset}
        except Exception as ex:
            lgr.exception("Get Error rate Exception %s" % ex)
        return {'code': '800.400.001 %s' % str(ex)}
示例#24
0
    def log_event(event_type,
                  system,
                  interface=None,
                  method=None,
                  response=None,
                  request=None,
                  code=None,
                  description=None,
                  stack_trace=None,
                  **kwargs):
        """
		Logs an event that being reported from an external system or an health check
		@param event_type: Type of the event to be logged
		@type event_type: str
		@param system: The system where the event occurred
		@type system: str
		@param interface: Specific interface in a system where the event occurred
		@type interface: str | None
		@param method: Specific method within an interface where the event occurred
		@type method: str | None
		@param response: Response body, if any, of the reported event occurrence
		@type response: str | None
		@param request: Request body, if any, of the reported event occurrence
		@type request: str | None
		@param code: Response code of the event
		@type code: str | None
		@param description: Detailed information on the event occurrence
		@type description: str | None
		@param stack_trace: Stack trace from the on the event occurrence
		@type stack_trace: str | None
		@param kwargs: Extra key=>value arguments to be passed for the event logging
		@return: Response code in a dictionary indicating if the event is created successfully or not
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name="Active")
            event_type = EventTypeService().get(name=event_type,
                                                state__name="Active")
            if system is None or event_type is None:
                return {"code": "800.400.002"}
            event = EventService().create(
                event_type=event_type,
                system=system,
                method=method,
                response=response,
                request=request,
                code=code,
                description=description,
                state=StateService().get(name="Active"),
                interface=InterfaceService().get(name=interface,
                                                 state__name="Active",
                                                 system=system),
                stack_trace=stack_trace)
            if event is not None:
                escalation = EventLog.escalate_event(event)
                if escalation.get('code') != '800.200.001':
                    lgr.error('%s event escalation Failed' % event_type)
                created_event = EventService().filter(id=event.id).values(
                    'id', 'event_type', 'state__id', 'system__id', 'method',
                    'response', 'request', 'code', 'description',
                    'interface__id', 'stack_trace').first()
                return {'code': '800.200.001', 'data': created_event}
        except Exception as ex:
            lgr.exception('Event processor exception %s' % ex)
        return {'code': '800.400.001'}
    def past_incidents(system, date_from=None, date_to=None):
        """
		Retrieves historical incidents within a specified start and end date range within a system
		@param system: System where the incident is created in
		@type system: str
		@param date_from: Start date limit applied
		@type date_from: str | None
		@param date_to: End date limit to be applied
		@type date_to: str | None
		@return: incidents | response code to indicate errors retrieving the incident
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name='Active')
            if not system:
                return {'code': '800.400.002'}
            if date_from and date_to:
                date_from = dateutil.parser.parse(date_from)
                date_to = dateutil.parser.parse(date_to)
            else:
                date_from = datetime.combine(datetime.now(),
                                             datetime.min.time())
                date_to = date_from - timedelta(days=15)
            # return {'code': '800.200.001', 'data': date_to}
            data = []
            for date in (date_from - timedelta(n)
                         for n in range((date_from - date_to).days)):
                incidents = list(IncidentService().filter(
                    system=system,
                    date_created__gte=date,
                    date_created__lt=date +
                    timedelta(1)).exclude(~(Q(state__name='Resolved') | Q(
                        state__name='Completed'))).values(
                            'id',
                            'name',
                            'description',
                            'system_id',
                            'priority_level',
                            'date_created',
                            'date_modified',
                            'scheduled_for',
                            'scheduled_until',
                            type=F('incident_type__name'),
                            eventtype=F('event_type__name'),
                            incident_id=F('id'),
                            status=F('state__name'),
                            affected_system=F('system__name')).order_by(
                                '-date_created'))
                for incident in incidents:
                    incident_updates = list(IncidentLogService().filter(
                        incident__id=incident.get('incident_id')).values(
                            'description',
                            'priority_level',
                            'date_created',
                            'date_modified',
                            user_name=F('user__username'),
                            status=F('state__name')).order_by('-date_created'))
                    incident.update(incident_updates=incident_updates)
                data.append({'date': date, 'incidents': incidents})

            return {'code': '800.200.001', 'data': data}
        except Exception as ex:
            lgr.exception("Get incidents exception %s" % ex)
        return {'code': '800.400.001'}
示例#26
0
 def perform_health_check():
     """
     This method formats system  data and logs system status to system monitor model
     @return: Systems: a dictionary containing a success code and a list of dictionaries containing  system status
                     data
     @rtype:dict
     """
     systems = []
     try:
         for endpoint in EndpointService().filter(
                 system__state__name="Active",
                 endpoint_type__is_queried=True):
             try:
                 health_state = requests.get(endpoint.url)
                 monitor_data = {
                     'system': endpoint.system.name,
                     'endpoint': endpoint.name,
                     'response_body': health_state.content,
                     'response_code': health_state.status_code,
                     'state': StateService().get(name='Operational').name,
                 }
                 if health_state.status_code == 200:
                     if health_state.elapsed > endpoint.optimal_response_time:
                         monitor_data.update({
                             "response_time_speed":
                             'Slow',
                             "event_type":
                             EventTypeService().get(name='Warning'),
                             "description":
                             'Response time is not within the expected time',
                             "state":
                             StateService().get(
                                 name='Degraded Performance'),
                             "response_time":
                             health_state.elapsed.total_seconds()
                         })
                     else:
                         monitor_data.update({
                             'response_time_speed':
                             'Normal',
                             "response_time":
                             health_state.elapsed.total_seconds()
                         })
                 else:
                     monitor_data.update({
                         "response_time_speed":
                         None,
                         "event_type":
                         EventTypeService().get(name='Critical'),
                         "description":
                         'The system is not accessible',
                         "state":
                         StateService().get(name='Major Outage')
                     })
                 system_status = SystemMonitorService().create(
                     system=SystemService().get(
                         name=monitor_data.get('system')),
                     response_time=timedelta(
                         seconds=int(monitor_data.get('response_time'))),
                     response_time_speed=monitor_data.get(
                         "response_time_speed"),
                     state=StateService().get(
                         name=monitor_data.get('state')),
                     response_body=monitor_data.get("response_body"),
                     endpoint=EndpointService().get(
                         name=monitor_data.get("endpoint")),
                     response_code=monitor_data.get("response_code"))
                 if system_status is not None:
                     systems.append({
                         "system": system_status.system.name,
                         "status": system_status.state.name,
                         "endpoint": endpoint.url
                     })
                 else:
                     systems.append({
                         "system": system_status.system,
                         "status": "failed",
                         "endpoint": endpoint
                     })
                 if monitor_data.get("event_type") is not None:
                     event = EventLog.log_event(
                         event_type=monitor_data.get("event_type").name,
                         system=monitor_data.get("system"),
                         description=monitor_data.get("description"),
                         response=monitor_data.get('response'),
                         request=health_state.request)
                     if event['code'] != "800.200.001":
                         lgr.warning("Event creation failed %s" % event)
             except requests.ConnectionError as e:
                 lgr.exception('Endpoint health check failed:  %s' % e)
         return {"code": "800.200.001", "data": {"systems": systems}}
     except Exception as ex:
         lgr.exception("Health Status exception:  %s" % ex)
     return {
         "code": "800.400.001",
         "message": "Error while performing health check"
     }
示例#27
0
    def get_system_endpoint_response_time(system_id, start_date, end_date):
        """
        Returns the response time of every endpoint for a specific system
        @param end_date: End date of the period of which the data is to be extracted
        @type:str
        @param start_date: Start point of the data to be presented
        @type: str
        @param: system_id: Id of the system
        @type system_id: str
        @return: Response code indicating status and response time graph data
        """
        try:
            system = SystemService().get(pk=system_id, state__name='Active')
            if not system:
                return {'code': '800.400.200'}
            now = timezone.now()
            start_date = dateutil.parser.parse(start_date)
            end_date = dateutil.parser.parse(end_date)
            period = start_date - end_date
            labels = []
            label = []
            dataset = []
            if period.days <= 1:
                for i in range(1, 25):
                    past_hour = now - timedelta(hours=i, minutes=0)
                    current_hour = past_hour + timedelta(hours=1)
                    response_times = list(SystemMonitorService().filter(
                        system=system,
                        date_created__lte=current_hour,
                        date_created__gte=past_hour).values(
                            name=F('endpoint__name'),
                            responseTime=F('response_time'),
                            dateCreated=F('date_created')))
                    past_hour = past_hour.replace(minute=0)
                    label.append(past_hour.strftime("%m/%d/%y  %H:%M"))
                    result = {"Initial": {"data": [0]}}
                    for response_time in response_times:
                        response_time.update(
                            responseTime=timedelta.total_seconds(
                                response_time.get('responseTime')),
                            dateCreated=response_time["dateCreated"].strftime(
                                "%m/%d/%y  %H:%M"))
                        dataset.append(response_time)
                        labels.append(response_time['dateCreated'])
                    if dataset:
                        label = []
                        [
                            label.append(item) for item in labels
                            if item not in label
                        ]
                        result = {}
                        for row in dataset:
                            if row["name"] in result:
                                result[row["name"]]["data"].append(
                                    row["responseTime"])
                                result[row["name"]]["dateCreated"].append(
                                    row["dateCreated"])
                            else:
                                result[row["name"]] = {
                                    "label": row["name"],
                                    "data": [row["responseTime"]],
                                    "dateCreated": [row["dateCreated"]],
                                }
            elif period.days <= 7:
                for i in range(0, 7):
                    current_day = now - timedelta(days=i, hours=0, minutes=0)
                    past_day = current_day + timedelta(days=1)
                    response_times = list(SystemMonitorService().filter(
                        system=system,
                        date_created__lte=past_day,
                        date_created__gte=current_day).values(
                            name=F('endpoint__name'),
                            responseTime=F('response_time'),
                            dateCreated=F('date_created')))
                    past_day = past_day.replace(hour=0, minute=0)
                    label.append(past_day.strftime("%m/%d/%y  %H:%M"))
                    result = {"Initial": {"data": [0]}}
                    for response_time in response_times:
                        response_time.update(
                            responseTime=timedelta.total_seconds(
                                response_time.get('responseTime')),
                            dateCreated=response_time["dateCreated"].strftime(
                                "%m/%d/%y  %H:%M"))
                        dataset.append(response_time)
                        labels.append(response_time['dateCreated'])
                    if dataset:
                        label = []
                        [
                            label.append(item) for item in labels
                            if item not in label
                        ]
                        result = {}
                        for row in dataset:
                            if row["name"] in result:
                                result[row["name"]]["data"].append(
                                    row["responseTime"])
                                result[row["name"]]["dateCreated"].append(
                                    row["dateCreated"])
                            else:
                                result[row["name"]] = {
                                    "label": row["name"],
                                    "data": [row["responseTime"]],
                                    "dateCreated": [row["dateCreated"]],
                                }
            elif period.days <= 31:
                for i in range(0, 31):
                    current_day = now - timedelta(days=i, hours=0, minutes=0)
                    past_day = current_day + timedelta(days=1)
                    response_times = list(SystemMonitorService().filter(
                        system=system,
                        date_created__lte=past_day,
                        date_created__gte=current_day).values(
                            name=F('endpoint__name'),
                            responseTime=F('response_time'),
                            dateCreated=F('date_created')))

                    # dates = [x.get('dateCreated') for x in response_times]
                    # for d in (current_day - past_day for x in range(0, 30)):
                    # 	if d not in dates:
                    # 		response_times.append({'dateCreated': d, 'responseTime': 0})
                    past_day = past_day.replace(hour=0, minute=0)
                    label.append(past_day.strftime("%m/%d/%y  %H:%M"))
                    result = {"Initial": {"data": [0]}}
                    for response_time in response_times:
                        response_time.update(
                            responseTime=timedelta.total_seconds(
                                response_time.get('responseTime')),
                            dateCreated=response_time["dateCreated"].strftime(
                                "%m/%d/%y  %H:%M"))
                        dataset.append(response_time)
                        labels.append(response_time['dateCreated'])
                    if dataset:
                        label = []
                        [
                            label.append(item) for item in labels
                            if item not in label
                        ]
                        result = {}
                        for row in dataset:
                            if row["name"] in result:
                                result[row["name"]]["data"].append(
                                    row["responseTime"])
                                result[row["name"]]["dateCreated"].append(
                                    row["dateCreated"])
                            else:
                                result[row["name"]] = {
                                    "label": row["name"],
                                    "data": [row["responseTime"]],
                                    "dateCreated": [row["dateCreated"]],
                                }
            elif period.days <= 365:
                current_date = now.replace(day=1,
                                           hour=0,
                                           minute=0,
                                           second=0,
                                           microsecond=0)
                current_month = now.month
                current_date = current_date.replace(
                    day=1, hour=0, minute=0, second=0,
                    microsecond=0) + timedelta(days=calendar.monthrange(
                        current_date.year, current_month)[1] - 1)
                for i in range(1, 13):
                    if current_month > 1:
                        month_name = calendar.month_name[current_month]
                        end_date = current_date
                        start_date = current_date - timedelta(
                            days=calendar.monthrange(end_date.year,
                                                     end_date.month)[1] - 1)
                        current_date = current_date - timedelta(
                            days=calendar.monthrange(current_date.year,
                                                     current_month)[1])
                        current_month = current_month - 1
                    else:
                        month_name = calendar.month_name[current_month]
                        end_date = current_date
                        start_date = current_date - timedelta(
                            days=calendar.monthrange(end_date.year,
                                                     end_date.month)[1] - 1)
                        current_date = current_date - timedelta(
                            days=calendar.monthrange(current_date.year,
                                                     current_month)[1])
                        current_month = current_date.month
                    response_times = list(SystemMonitorService().filter(
                        system=system,
                        date_created__lte=end_date,
                        date_created__gte=start_date).values(
                            name=F('endpoint__name'),
                            responseTime=F('response_time'),
                            dateCreated=F('date_created')))
                    label.append('%s, %s' % (month_name, current_date.year))
                    result = {"Initial": {"data": [0]}}
                    for response_time in response_times:
                        response_time.update(
                            responseTime=timedelta.total_seconds(
                                response_time.get('responseTime')),
                            dateCreated=response_time["dateCreated"].strftime(
                                "%m/%d/%y  %H:%M"))
                        dataset.append(response_time)
                        labels.append(response_time['dateCreated'])
                    if dataset:
                        label = []
                        [
                            label.append(item) for item in labels
                            if item not in label
                        ]
                        result = {}
                        for row in dataset:
                            if row["name"] in result:
                                result[row["name"]]["data"].append(
                                    row["responseTime"])
                                result[row["name"]]["dateCreated"].append(
                                    row["dateCreated"])
                            else:
                                result[row["name"]] = {
                                    "label": row["name"],
                                    "data": [row["responseTime"]],
                                    "dateCreated": [row["dateCreated"]],
                                }

            return {
                'code': '800.200.001',
                'data': {
                    'labels': label,
                    'datasets': result
                }
            }
        except Exception as ex:
            lgr.exception("Get Error rate Exception %s" % ex)
        return {'code': '800.400.001'}
示例#28
0
    def log_incident(incident_type,
                     system,
                     escalation_level,
                     name,
                     description,
                     priority_level,
                     event_type=None,
                     state="Investigating",
                     escalated_events=None,
                     scheduled_for=None,
                     scheduled_until=None,
                     **kwargs):
        """
		Creates a realtime incident based on escalated events or scheduled incident based on user reports
		@param incident_type: Type of the incident to be created
		@type incident_type: str
		@param system: The system which the incident will be associated with
		@type system: str
		@param name: Title of the incident
		@type name: str
		@param description: Details on the incident
		@type description: str
		@param event_type: Type of the event(s) that triggered creation of the incident, if its event driven.
		@type event_type: str | None
		@param escalated_events: One or more events in the escalation if the incident is event driven.
		@type escalated_events: list | None
		@param state: Initial resolution state of the incident. Defaults to Investigating if left blank
		@type state: str
		@param priority_level: The level of importance to be assigned to the incident.
		@type priority_level: str
		@param escalation_level: Level at which an escalation is configured with a set of recipients
		@type escalation_level: str
		@param scheduled_for: Time the scheduled maintenance should begin if the incident is scheduled
		@type scheduled_for: str | None
		@param scheduled_until: Time the scheduled maintenance should end if the incident is scheduled
		@type scheduled_until: str | None
		@param kwargs: Extra key-value arguments to pass for incident logging
		@return: Response code dictionary to indicate if the incident was created or not
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name="Active")
            incident_type = IncidentTypeService().get(name=incident_type,
                                                      state__name="Active")
            try:
                state = StateService().get(pk=uuid.UUID(state))
            except ValueError:
                state = StateService().get(
                    name=state
                ) if incident_type.name == 'Realtime' else StateService().get(
                    name='Scheduled')
            escalation_level = EscalationLevelService().get(
                pk=escalation_level, state__name="Active")
            if system is None or incident_type is None or escalation_level is None:
                return {"code": "800.400.002"}
            if incident_type.name == "Realtime" and event_type is not None:
                incident = IncidentService().filter(
                    event_type__name=event_type, system=system).exclude(
                        Q(state__name='Resolved'),
                        Q(state__name='Completed')).order_by(
                            '-date_created').first()
                if incident and int(priority_level) < 5:
                    priority_level = incident.priority_level + 1
                    return IncidentAdministrator().update_incident(
                        incident_id=incident.id,
                        escalation_level=escalation_level.name,
                        name=incident.name,
                        state=incident.state.id,
                        priority_level=str(priority_level),
                        description=
                        "Priority level of %s incident changed to %s" %
                        (incident.name, priority_level))
            if incident_type.name == 'Scheduled':
                scheduled_for = dateutil.parser.parse(scheduled_for)
                scheduled_until = dateutil.parser.parse(scheduled_until)
            incident = IncidentService().create(
                name=name,
                description=description,
                state=StateService().get(name=state),
                system=system,
                incident_type=incident_type,
                scheduled_for=scheduled_for,
                scheduled_until=scheduled_until,
                event_type=EventTypeService().get(name=event_type),
                priority_level=int(priority_level))
            incident_log = IncidentLogService().create(
                description=description,
                incident=incident,
                priority_level=priority_level,
                state=StateService().get(name=state),
                escalation_level=escalation_level)
            if incident is not None and incident_log is not None:
                if escalated_events:
                    for event in escalated_events:
                        incident_event = IncidentEventService().create(
                            event=event,
                            incident=incident,
                            state=StateService().get(name="Active"))
                        if not incident_event:
                            lgr.error("Error creating incident-events")
                email_system_recipients = SystemRecipientService().filter(
                    escalation_level=escalation_level,
                    system=incident.system,
                    state__name='Active',
                    notification_type__name='Email').values('recipient__id')
                sms_system_recipients = SystemRecipientService().filter(
                    escalation_level=escalation_level,
                    system=incident.system,
                    state__name='Active',
                    notification_type__name='Sms').values('recipient__id')
                sms_notification = NotificationLogger().send_notification(
                    message=incident.description,
                    message_type="Sms",
                    system_id=incident.system.id,
                    recipients=[
                        str(recipient["phone_number"])
                        for recipient in User.objects.filter(
                            id__in=sms_system_recipients,
                            is_active=True).values("phone_number")
                    ])
                email_notification = NotificationLogger().send_notification(
                    message=incident.description,
                    message_type="Email",
                    system_id=incident.system.id,
                    recipients=[
                        str(recipient['user__email']) for recipient in
                        User.objects.filter(id__in=email_system_recipients,
                                            is_active=True).values('email')
                    ])
                if sms_notification.get(
                        'code') != '800.200.001' or email_notification.get(
                            'code') != '800.200.001':
                    lgr.exception("Notification sending failed")
                return {'code': '800.200.001'}
        except Exception as ex:
            lgr.exception("Incident Logger exception %s" % ex)
        return {"code": "800.400.001"}