def patch(self, request, pk, agentid): from alerts.models import Alert from logs.models import AuditLog agent = get_object_or_404(Agent, agent_id=agentid) task = get_object_or_404(AutomatedTask, pk=pk) serializer = TaskRunnerPatchSerializer( instance=task, data=request.data, partial=True ) serializer.is_valid(raise_exception=True) serializer.save(last_run=djangotime.now()) status = "failing" if task.retcode != 0 else "passing" new_task: AutomatedTask = AutomatedTask.objects.get(pk=task.pk) new_task.status = status new_task.save() if status == "passing": if Alert.objects.filter(assigned_task=new_task, resolved=False).exists(): Alert.handle_alert_resolve(new_task) else: Alert.handle_alert_failure(new_task) AuditLog.objects.create( username=agent.hostname, agent=agent.hostname, object_type="agent", action="task_run", message=f"Scheduled Task {task.name} was run on {agent.hostname}", after_value=AutomatedTask.serialize(new_task), ) return Response("ok")
def agent_outages_task() -> None: from alerts.models import Alert agents = Agent.objects.only( "pk", "last_seen", "offline_time", "overdue_time", "overdue_email_alert", "overdue_text_alert", "overdue_dashboard_alert", ) for agent in agents: if agent.status == "overdue": Alert.handle_alert_failure(agent)
def handle_check(self, data): from alerts.models import Alert # cpuload or mem checks if self.check_type == "cpuload" or self.check_type == "memory": self.history.append(data["percent"]) if len(self.history) > 15: self.history = self.history[-15:] self.save(update_fields=["history"]) avg = int(mean(self.history)) if self.error_threshold and avg > self.error_threshold: self.status = "failing" self.alert_severity = "error" elif self.warning_threshold and avg > self.warning_threshold: self.status = "failing" self.alert_severity = "warning" else: self.status = "passing" # add check history self.add_check_history(data["percent"]) # diskspace checks elif self.check_type == "diskspace": if data["exists"]: percent_used = round(data["percent_used"]) if self.error_threshold and ( 100 - percent_used) < self.error_threshold: self.status = "failing" self.alert_severity = "error" elif (self.warning_threshold and (100 - percent_used) < self.warning_threshold): self.status = "failing" self.alert_severity = "warning" else: self.status = "passing" self.more_info = data["more_info"] # add check history self.add_check_history(100 - percent_used) else: self.status = "failing" self.alert_severity = "error" self.more_info = f"Disk {self.disk} does not exist" self.save(update_fields=["more_info"]) # script checks elif self.check_type == "script": self.stdout = data["stdout"] self.stderr = data["stderr"] self.retcode = data["retcode"] self.execution_time = "{:.4f}".format(data["runtime"]) if data["retcode"] in self.info_return_codes: self.alert_severity = "info" self.status = "failing" elif data["retcode"] in self.warning_return_codes: self.alert_severity = "warning" self.status = "failing" elif data["retcode"] != 0: self.status = "failing" self.alert_severity = "error" else: self.status = "passing" self.save(update_fields=[ "stdout", "stderr", "retcode", "execution_time", ]) # add check history self.add_check_history( 1 if self.status == "failing" else 0, { "retcode": data["retcode"], "stdout": data["stdout"][:60], "stderr": data["stderr"][:60], "execution_time": self.execution_time, }, ) # ping checks elif self.check_type == "ping": self.status = data["status"] self.more_info = data["output"] self.save(update_fields=["more_info"]) self.add_check_history(1 if self.status == "failing" else 0, self.more_info[:60]) # windows service checks elif self.check_type == "winsvc": self.status = data["status"] self.more_info = data["more_info"] self.save(update_fields=["more_info"]) self.add_check_history(1 if self.status == "failing" else 0, self.more_info[:60]) elif self.check_type == "eventlog": log = data["log"] if self.fail_when == "contains": if log and len(log) >= self.number_of_events_b4_alert: self.status = "failing" else: self.status = "passing" elif self.fail_when == "not_contains": if log and len(log) >= self.number_of_events_b4_alert: self.status = "passing" else: self.status = "failing" self.extra_details = {"log": log} self.save(update_fields=["extra_details"]) self.add_check_history( 1 if self.status == "failing" else 0, "Events Found:" + str(len(self.extra_details["log"])), ) # handle status if self.status == "failing": self.fail_count += 1 self.save(update_fields=["status", "fail_count", "alert_severity"]) if self.fail_count >= self.fails_b4_alert: Alert.handle_alert_failure(self) elif self.status == "passing": self.fail_count = 0 self.save(update_fields=["status", "fail_count", "alert_severity"]) if Alert.objects.filter(assigned_check=self, resolved=False).exists(): Alert.handle_alert_resolve(self) return self.status
def patch(self, request, pk, agentid): from alerts.models import Alert from logs.models import AuditLog agent = get_object_or_404(Agent, agent_id=agentid) task = get_object_or_404(AutomatedTask, pk=pk) serializer = TaskRunnerPatchSerializer(instance=task, data=request.data, partial=True) serializer.is_valid(raise_exception=True) new_task = serializer.save(last_run=djangotime.now()) # check if task is a collector and update the custom field if task.custom_field: if not task.stderr: if AgentCustomField.objects.filter(field=task.custom_field, agent=task.agent).exists(): agent_field = AgentCustomField.objects.get( field=task.custom_field, agent=task.agent) else: agent_field = AgentCustomField.objects.create( field=task.custom_field, agent=task.agent) # get last line of stdout value = new_task.stdout.split("\n")[-1].strip() if task.custom_field.type in [ "text", "number", "single", "datetime" ]: agent_field.string_value = value agent_field.save() elif task.custom_field.type == "multiple": agent_field.multiple_value = value.split(",") agent_field.save() elif task.custom_field.type == "checkbox": agent_field.bool_value = bool(value) agent_field.save() status = "passing" else: status = "failing" else: status = "failing" if task.retcode != 0 else "passing" new_task.status = status new_task.save() if status == "passing": if Alert.objects.filter(assigned_task=new_task, resolved=False).exists(): Alert.handle_alert_resolve(new_task) else: Alert.handle_alert_failure(new_task) AuditLog.objects.create( username=agent.hostname, agent=agent.hostname, object_type="agent", action="task_run", message=f"Scheduled Task {task.name} was run on {agent.hostname}", after_value=AutomatedTask.serialize(new_task), ) return Response("ok")
def handle_checkv2(self, data): from alerts.models import Alert # cpuload or mem checks if self.check_type == "cpuload" or self.check_type == "memory": self.history.append(data["percent"]) if len(self.history) > 15: self.history = self.history[-15:] self.save(update_fields=["history"]) avg = int(mean(self.history)) if self.error_threshold and avg > self.error_threshold: self.status = "failing" self.alert_severity = "error" elif self.warning_threshold and avg > self.warning_threshold: self.status = "failing" self.alert_severity = "warning" else: self.status = "passing" # add check history self.add_check_history(data["percent"]) # diskspace checks elif self.check_type == "diskspace": if data["exists"]: percent_used = round(data["percent_used"]) total = bytes2human(data["total"]) free = bytes2human(data["free"]) if self.error_threshold and ( 100 - percent_used) < self.error_threshold: self.status = "failing" self.alert_severity = "error" elif (self.warning_threshold and (100 - percent_used) < self.warning_threshold): self.status = "failing" self.alert_severity = "warning" else: self.status = "passing" self.more_info = f"Total: {total}B, Free: {free}B" # add check history self.add_check_history(100 - percent_used) else: self.status = "failing" self.alert_severity = "error" self.more_info = f"Disk {self.disk} does not exist" self.save(update_fields=["more_info"]) # script checks elif self.check_type == "script": self.stdout = data["stdout"] self.stderr = data["stderr"] self.retcode = data["retcode"] try: # python agent self.execution_time = "{:.4f}".format(data["stop"] - data["start"]) except: # golang agent self.execution_time = "{:.4f}".format(data["runtime"]) if data["retcode"] in self.info_return_codes: self.alert_severity = "info" self.status = "failing" elif data["retcode"] in self.warning_return_codes: self.alert_severity = "warning" self.status = "failing" elif data["retcode"] != 0: self.status = "failing" self.alert_severity = "error" else: self.status = "passing" self.save(update_fields=[ "stdout", "stderr", "retcode", "execution_time", ]) # add check history self.add_check_history( 1 if self.status == "failing" else 0, { "retcode": data["retcode"], "stdout": data["stdout"][:60], "stderr": data["stderr"][:60], "execution_time": self.execution_time, }, ) # ping checks elif self.check_type == "ping": success = ["Reply", "bytes", "time", "TTL"] output = data["output"] if data["has_stdout"]: if all(x in output for x in success): self.status = "passing" else: self.status = "failing" elif data["has_stderr"]: self.status = "failing" self.more_info = output self.save(update_fields=["more_info"]) self.add_check_history(1 if self.status == "failing" else 0, self.more_info[:60]) # windows service checks elif self.check_type == "winsvc": svc_stat = data["status"] self.more_info = f"Status {svc_stat.upper()}" if data["exists"]: if svc_stat == "running": self.status = "passing" elif svc_stat == "start_pending" and self.pass_if_start_pending: self.status = "passing" else: if self.agent and self.restart_if_stopped: nats_data = { "func": "winsvcaction", "payload": { "name": self.svc_name, "action": "start" }, } r = asyncio.run( self.agent.nats_cmd(nats_data, timeout=32)) if r == "timeout" or r == "natsdown": self.status = "failing" elif not r["success"] and r["errormsg"]: self.status = "failing" elif r["success"]: self.status = "passing" self.more_info = f"Status RUNNING" else: self.status = "failing" else: self.status = "failing" else: if self.pass_if_svc_not_exist: self.status = "passing" else: self.status = "failing" self.more_info = f"Service {self.svc_name} does not exist" self.save(update_fields=["more_info"]) self.add_check_history(1 if self.status == "failing" else 0, self.more_info[:60]) elif self.check_type == "eventlog": log = [] is_wildcard = self.event_id_is_wildcard eventType = self.event_type eventID = self.event_id source = self.event_source message = self.event_message r = data["log"] for i in r: if i["eventType"] == eventType: if not is_wildcard and not int(i["eventID"]) == eventID: continue if not source and not message: if is_wildcard: log.append(i) elif int(i["eventID"]) == eventID: log.append(i) continue if source and message: if is_wildcard: if source in i["source"] and message in i[ "message"]: log.append(i) elif int(i["eventID"]) == eventID: if source in i["source"] and message in i[ "message"]: log.append(i) continue if source and source in i["source"]: if is_wildcard: log.append(i) elif int(i["eventID"]) == eventID: log.append(i) if message and message in i["message"]: if is_wildcard: log.append(i) elif int(i["eventID"]) == eventID: log.append(i) if self.fail_when == "contains": if log: self.status = "failing" else: self.status = "passing" elif self.fail_when == "not_contains": if log: self.status = "passing" else: self.status = "failing" self.extra_details = {"log": log} self.save(update_fields=["extra_details"]) self.add_check_history( 1 if self.status == "failing" else 0, "Events Found:" + str(len(self.extra_details["log"])), ) # handle status if self.status == "failing": self.fail_count += 1 self.save(update_fields=["status", "fail_count", "alert_severity"]) if self.fail_count >= self.fails_b4_alert: Alert.handle_alert_failure(self) elif self.status == "passing": self.fail_count = 0 self.save(update_fields=["status", "fail_count", "alert_severity"]) if Alert.objects.filter(assigned_check=self, resolved=False).exists(): Alert.handle_alert_resolve(self) return self.status