Exemplo n.º 1
0
    async def __run_alerts(self):
        master_node = "A"
        backup_node = "B"
        run_on_backup_node = False
        if not await self.middleware.call("system.is_freenas"):
            if await self.middleware.call("failover.licensed"):
                master_node = await self.middleware.call("failover.node")
                try:
                    backup_node = await self.middleware.call(
                        "failover.call_remote", "failover.node")
                    remote_version = await self.middleware.call(
                        "failover.call_remote", "system.version")
                    remote_failover_status = await self.middleware.call(
                        "failover.call_remote", "failover.status")
                except Exception:
                    pass
                else:
                    if remote_version == await self.middleware.call(
                            "system.version"):
                        if remote_failover_status == "BACKUP":
                            run_on_backup_node = True

        for alert_source in ALERT_SOURCES.values():
            if isinstance(alert_source, OneShotAlertSource):
                continue

            if not alert_source.schedule.should_run(
                    datetime.utcnow(),
                    self.alert_source_last_run[alert_source.name]):
                continue

            self.alert_source_last_run[alert_source.name] = datetime.utcnow()

            self.logger.trace("Running alert source: %r", alert_source.name)

            try:
                alerts_a = await self.__run_source(alert_source.name)
            except UnavailableException:
                alerts_a = list(self.alerts["A"][alert_source.name].values())
            for alert in alerts_a:
                alert.node = master_node

            alerts_b = []
            if run_on_backup_node and alert_source.run_on_backup_node:
                try:
                    try:
                        alerts_b = await self.middleware.call(
                            "failover.call_remote", "alert.run_source",
                            [alert_source.name])
                    except CallError as e:
                        if e.errno == CallError.EALERTCHECKERUNAVAILABLE:
                            alerts_b = list(
                                self.alerts["B"][alert_source.name].values())
                        else:
                            raise
                    else:
                        alerts_b = [
                            Alert(**dict(alert,
                                         level=(AlertLevel(alert["level"])
                                                if alert["level"] is not None
                                                else alert["level"])))
                            for alert in alerts_b
                        ]
                except Exception:
                    alerts_b = [
                        Alert(
                            title=
                            "Unable to run alert source %(source_name)r on backup node\n%(traceback)s",
                            args={
                                "source_name": alert_source.name,
                                "traceback": traceback.format_exc(),
                            },
                            key="__remote_call_exception__",
                            level=AlertLevel.CRITICAL)
                    ]
            for alert in alerts_b:
                alert.node = backup_node

            for alert in alerts_a + alerts_b:
                self.__handle_alert(alert_source, alert)

            self.alerts["A"][alert_source.name] = {
                alert.key: alert
                for alert in alerts_a
            }
            self.alerts["B"][alert_source.name] = {
                alert.key: alert
                for alert in alerts_b
            }
Exemplo n.º 2
0
    async def __run_alerts(self):
        master_node = "A"
        backup_node = "B"
        run_on_backup_node = False
        if not await self.middleware.call("system.is_freenas"):
            if await self.middleware.call("notifier.failover_licensed"):
                master_node = await self.middleware.call("failover.node")
                try:
                    backup_node = await self.middleware.call(
                        "failover.call_remote", "failover.node")
                    remote_version = await self.middleware.call(
                        "failover.call_remote", "system.version")
                    remote_failover_status = await self.middleware.call(
                        "failover.call_remote", "notifier.failover_status")
                except Exception:
                    pass
                else:
                    if remote_version == await self.middleware.call(
                            "system.version"):
                        if remote_failover_status == "BACKUP":
                            run_on_backup_node = True

        for alert_source in ALERT_SOURCES.values():
            if not alert_source.schedule.should_run(
                    datetime.utcnow(),
                    self.alert_source_last_run[alert_source.name]):
                continue

            self.alert_source_last_run[alert_source.name] = datetime.utcnow()

            self.logger.trace("Running alert source: %r", alert_source.name)

            alerts_a = await self.__run_source(alert_source.name)
            for alert in alerts_a:
                alert.node = master_node

            alerts_b = []
            if run_on_backup_node and alert_source.run_on_backup_node:
                try:
                    alerts_b = [
                        Alert(**dict(
                            alert,
                            level=(AlertLevel(alert["level"]) if alert["level"]
                                   is not None else alert["level"])))
                        for alert in (await self.middleware.call(
                            "failover.call_remote", "alert.run_source",
                            [alert_source.name]))
                    ]
                except Exception:
                    alerts_b = [
                        Alert(
                            title=
                            "Unable to run alert source %(source_name)r on backup node\n%(traceback)s",
                            args={
                                "source_name": alert_source.name,
                                "traceback": traceback.format_exc(),
                            },
                            key="__remote_call_exception__",
                            level=AlertLevel.CRITICAL)
                    ]
            for alert in alerts_b:
                alert.node = backup_node

            for alert in alerts_a + alerts_b:
                existing_alert = self.alerts[alert.node][
                    alert_source.name].get(alert.key)

                alert.source = alert_source.name
                if existing_alert is None:
                    alert.datetime = datetime.utcnow()
                else:
                    alert.datetime = existing_alert.datetime
                alert.level = alert.level or alert_source.level
                alert.title = alert.title or alert_source.title
                if existing_alert is None:
                    alert.dismissed = False
                else:
                    alert.dismissed = existing_alert.dismissed

            self.alerts["A"][alert_source.name] = {
                alert.key: alert
                for alert in alerts_a
            }
            self.alerts["B"][alert_source.name] = {
                alert.key: alert
                for alert in alerts_b
            }